From 5129044dce1f85ce4950f31bcf90f3886466f06a Mon Sep 17 00:00:00 2001 From: "C.J. Collier" Date: Tue, 14 Jun 2016 07:54:47 -0700 Subject: Imported upstream release 16.04 * gbp import-orig ../dpdk-16.04.tar.xz Change-Id: Iac2196db782ba322f6974d8a752acc34ce5024c3 Signed-off-by: C.J. Collier --- lib/librte_eal/Makefile | 38 + lib/librte_eal/bsdapp/Makefile | 38 + lib/librte_eal/bsdapp/contigmem/BSDmakefile | 36 + lib/librte_eal/bsdapp/contigmem/Makefile | 52 + lib/librte_eal/bsdapp/contigmem/contigmem.c | 232 + lib/librte_eal/bsdapp/eal/Makefile | 117 + lib/librte_eal/bsdapp/eal/eal.c | 638 ++ lib/librte_eal/bsdapp/eal/eal_alarm.c | 60 + lib/librte_eal/bsdapp/eal/eal_debug.c | 119 + lib/librte_eal/bsdapp/eal/eal_hugepage_info.c | 134 + lib/librte_eal/bsdapp/eal/eal_interrupts.c | 119 + lib/librte_eal/bsdapp/eal/eal_lcore.c | 79 + lib/librte_eal/bsdapp/eal/eal_log.c | 57 + lib/librte_eal/bsdapp/eal/eal_memory.c | 194 + lib/librte_eal/bsdapp/eal/eal_pci.c | 633 ++ lib/librte_eal/bsdapp/eal/eal_thread.c | 201 + lib/librte_eal/bsdapp/eal/eal_timer.c | 94 + .../bsdapp/eal/include/exec-env/rte_dom0_common.h | 107 + .../bsdapp/eal/include/exec-env/rte_interrupts.h | 137 + lib/librte_eal/bsdapp/eal/rte_eal_version.map | 153 + lib/librte_eal/bsdapp/nic_uio/BSDmakefile | 36 + lib/librte_eal/bsdapp/nic_uio/Makefile | 52 + lib/librte_eal/bsdapp/nic_uio/nic_uio.c | 335 + lib/librte_eal/common/Makefile | 61 + lib/librte_eal/common/arch/arm/rte_cpuflags.c | 179 + lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c | 147 + lib/librte_eal/common/arch/tile/rte_cpuflags.c | 47 + lib/librte_eal/common/arch/x86/rte_cpuflags.c | 220 + lib/librte_eal/common/eal_common_cpuflags.c | 76 + lib/librte_eal/common/eal_common_dev.c | 152 + lib/librte_eal/common/eal_common_devargs.c | 176 + lib/librte_eal/common/eal_common_errno.c | 72 + lib/librte_eal/common/eal_common_hexdump.c | 120 + lib/librte_eal/common/eal_common_launch.c | 118 + lib/librte_eal/common/eal_common_lcore.c | 110 + lib/librte_eal/common/eal_common_log.c | 337 + lib/librte_eal/common/eal_common_memory.c | 154 + lib/librte_eal/common/eal_common_memzone.c | 445 + lib/librte_eal/common/eal_common_options.c | 1022 ++ lib/librte_eal/common/eal_common_pci.c | 457 + lib/librte_eal/common/eal_common_pci_uio.c | 222 + lib/librte_eal/common/eal_common_proc.c | 61 + lib/librte_eal/common/eal_common_string_fns.c | 69 + lib/librte_eal/common/eal_common_tailqs.c | 202 + lib/librte_eal/common/eal_common_thread.c | 157 + lib/librte_eal/common/eal_common_timer.c | 86 + lib/librte_eal/common/eal_filesystem.h | 118 + lib/librte_eal/common/eal_hugepages.h | 67 + lib/librte_eal/common/eal_internal_cfg.h | 94 + lib/librte_eal/common/eal_options.h | 100 + lib/librte_eal/common/eal_private.h | 331 + lib/librte_eal/common/eal_thread.h | 100 + .../common/include/arch/arm/rte_atomic.h | 48 + .../common/include/arch/arm/rte_atomic_32.h | 74 + .../common/include/arch/arm/rte_atomic_64.h | 88 + .../common/include/arch/arm/rte_byteorder.h | 107 + .../common/include/arch/arm/rte_cpuflags.h | 42 + .../common/include/arch/arm/rte_cpuflags_32.h | 82 + .../common/include/arch/arm/rte_cpuflags_64.h | 64 + .../common/include/arch/arm/rte_cycles.h | 42 + .../common/include/arch/arm/rte_cycles_32.h | 121 + .../common/include/arch/arm/rte_cycles_64.h | 71 + .../common/include/arch/arm/rte_memcpy.h | 42 + .../common/include/arch/arm/rte_memcpy_32.h | 338 + .../common/include/arch/arm/rte_memcpy_64.h | 93 + .../common/include/arch/arm/rte_prefetch.h | 42 + .../common/include/arch/arm/rte_prefetch_32.h | 67 + .../common/include/arch/arm/rte_prefetch_64.h | 66 + .../common/include/arch/arm/rte_rwlock.h | 40 + .../common/include/arch/arm/rte_spinlock.h | 92 + lib/librte_eal/common/include/arch/arm/rte_vect.h | 83 + .../common/include/arch/ppc_64/rte_atomic.h | 432 + .../common/include/arch/ppc_64/rte_byteorder.h | 149 + .../common/include/arch/ppc_64/rte_cpuflags.h | 88 + .../common/include/arch/ppc_64/rte_cycles.h | 94 + .../common/include/arch/ppc_64/rte_memcpy.h | 225 + .../common/include/arch/ppc_64/rte_prefetch.h | 67 + .../common/include/arch/ppc_64/rte_rwlock.h | 38 + .../common/include/arch/ppc_64/rte_spinlock.h | 114 + .../common/include/arch/tile/rte_atomic.h | 92 + .../common/include/arch/tile/rte_byteorder.h | 91 + .../common/include/arch/tile/rte_cpuflags.h | 53 + .../common/include/arch/tile/rte_cycles.h | 70 + .../common/include/arch/tile/rte_memcpy.h | 93 + .../common/include/arch/tile/rte_prefetch.h | 67 + .../common/include/arch/tile/rte_rwlock.h | 70 + .../common/include/arch/tile/rte_spinlock.h | 92 + .../common/include/arch/x86/rte_atomic.h | 222 + .../common/include/arch/x86/rte_atomic_32.h | 222 + .../common/include/arch/x86/rte_atomic_64.h | 191 + .../common/include/arch/x86/rte_byteorder.h | 125 + .../common/include/arch/x86/rte_byteorder_32.h | 51 + .../common/include/arch/x86/rte_byteorder_64.h | 52 + .../common/include/arch/x86/rte_cpuflags.h | 153 + .../common/include/arch/x86/rte_cycles.h | 121 + .../common/include/arch/x86/rte_memcpy.h | 884 ++ .../common/include/arch/x86/rte_prefetch.h | 67 + lib/librte_eal/common/include/arch/x86/rte_rtm.h | 73 + .../common/include/arch/x86/rte_rwlock.h | 82 + .../common/include/arch/x86/rte_spinlock.h | 201 + lib/librte_eal/common/include/arch/x86/rte_vect.h | 132 + lib/librte_eal/common/include/generic/rte_atomic.h | 945 ++ .../common/include/generic/rte_byteorder.h | 217 + .../common/include/generic/rte_cpuflags.h | 82 + lib/librte_eal/common/include/generic/rte_cycles.h | 205 + lib/librte_eal/common/include/generic/rte_memcpy.h | 144 + .../common/include/generic/rte_prefetch.h | 83 + lib/librte_eal/common/include/generic/rte_rwlock.h | 208 + .../common/include/generic/rte_spinlock.h | 325 + lib/librte_eal/common/include/rte_alarm.h | 106 + .../common/include/rte_branch_prediction.h | 70 + lib/librte_eal/common/include/rte_common.h | 401 + lib/librte_eal/common/include/rte_debug.h | 103 + lib/librte_eal/common/include/rte_dev.h | 192 + lib/librte_eal/common/include/rte_devargs.h | 177 + lib/librte_eal/common/include/rte_eal.h | 259 + lib/librte_eal/common/include/rte_eal_memconfig.h | 100 + lib/librte_eal/common/include/rte_errno.h | 95 + lib/librte_eal/common/include/rte_hexdump.h | 89 + lib/librte_eal/common/include/rte_interrupts.h | 120 + lib/librte_eal/common/include/rte_keepalive.h | 108 + lib/librte_eal/common/include/rte_launch.h | 177 + lib/librte_eal/common/include/rte_lcore.h | 276 + lib/librte_eal/common/include/rte_log.h | 311 + lib/librte_eal/common/include/rte_malloc.h | 342 + lib/librte_eal/common/include/rte_malloc_heap.h | 55 + lib/librte_eal/common/include/rte_memory.h | 263 + lib/librte_eal/common/include/rte_memzone.h | 305 + lib/librte_eal/common/include/rte_pci.h | 598 ++ .../common/include/rte_pci_dev_feature_defs.h | 70 + .../common/include/rte_pci_dev_features.h | 69 + lib/librte_eal/common/include/rte_pci_dev_ids.h | 704 ++ lib/librte_eal/common/include/rte_per_lcore.h | 79 + lib/librte_eal/common/include/rte_random.h | 91 + lib/librte_eal/common/include/rte_string_fns.h | 81 + lib/librte_eal/common/include/rte_tailq.h | 162 + lib/librte_eal/common/include/rte_time.h | 122 + lib/librte_eal/common/include/rte_version.h | 130 + lib/librte_eal/common/include/rte_warnings.h | 84 + lib/librte_eal/common/malloc_elem.c | 344 + lib/librte_eal/common/malloc_elem.h | 192 + lib/librte_eal/common/malloc_heap.c | 236 + lib/librte_eal/common/malloc_heap.h | 70 + lib/librte_eal/common/rte_keepalive.c | 149 + lib/librte_eal/common/rte_malloc.c | 262 + lib/librte_eal/linuxapp/Makefile | 39 + lib/librte_eal/linuxapp/eal/Makefile | 139 + lib/librte_eal/linuxapp/eal/eal.c | 936 ++ lib/librte_eal/linuxapp/eal/eal_alarm.c | 273 + lib/librte_eal/linuxapp/eal/eal_debug.c | 119 + lib/librte_eal/linuxapp/eal/eal_hugepage_info.c | 365 + lib/librte_eal/linuxapp/eal/eal_interrupts.c | 1224 +++ lib/librte_eal/linuxapp/eal/eal_ivshmem.c | 955 ++ lib/librte_eal/linuxapp/eal/eal_lcore.c | 110 + lib/librte_eal/linuxapp/eal/eal_log.c | 146 + lib/librte_eal/linuxapp/eal/eal_memory.c | 1559 +++ lib/librte_eal/linuxapp/eal/eal_pci.c | 762 ++ lib/librte_eal/linuxapp/eal/eal_pci_init.h | 127 + lib/librte_eal/linuxapp/eal/eal_pci_uio.c | 491 + lib/librte_eal/linuxapp/eal/eal_pci_vfio.c | 1097 ++ lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c | 405 + lib/librte_eal/linuxapp/eal/eal_thread.c | 199 + lib/librte_eal/linuxapp/eal/eal_timer.c | 305 + lib/librte_eal/linuxapp/eal/eal_vfio.h | 67 + lib/librte_eal/linuxapp/eal/eal_xen_memory.c | 369 + .../eal/include/exec-env/rte_dom0_common.h | 108 + .../linuxapp/eal/include/exec-env/rte_interrupts.h | 228 + .../linuxapp/eal/include/exec-env/rte_kni_common.h | 172 + lib/librte_eal/linuxapp/eal/rte_eal_version.map | 156 + lib/librte_eal/linuxapp/igb_uio/Makefile | 53 + lib/librte_eal/linuxapp/igb_uio/compat.h | 116 + lib/librte_eal/linuxapp/igb_uio/igb_uio.c | 580 ++ lib/librte_eal/linuxapp/kni/Makefile | 93 + lib/librte_eal/linuxapp/kni/compat.h | 29 + lib/librte_eal/linuxapp/kni/ethtool/README | 100 + lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING | 339 + .../linuxapp/kni/ethtool/igb/e1000_82575.c | 3665 +++++++ .../linuxapp/kni/ethtool/igb/e1000_82575.h | 509 + .../linuxapp/kni/ethtool/igb/e1000_api.c | 1159 +++ .../linuxapp/kni/ethtool/igb/e1000_api.h | 157 + .../linuxapp/kni/ethtool/igb/e1000_defines.h | 1380 +++ lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h | 793 ++ .../linuxapp/kni/ethtool/igb/e1000_i210.c | 909 ++ .../linuxapp/kni/ethtool/igb/e1000_i210.h | 91 + .../linuxapp/kni/ethtool/igb/e1000_mac.c | 2096 ++++ .../linuxapp/kni/ethtool/igb/e1000_mac.h | 80 + .../linuxapp/kni/ethtool/igb/e1000_manage.c | 554 + .../linuxapp/kni/ethtool/igb/e1000_manage.h | 89 + .../linuxapp/kni/ethtool/igb/e1000_mbx.c | 525 + .../linuxapp/kni/ethtool/igb/e1000_mbx.h | 87 + .../linuxapp/kni/ethtool/igb/e1000_nvm.c | 965 ++ .../linuxapp/kni/ethtool/igb/e1000_nvm.h | 75 + .../linuxapp/kni/ethtool/igb/e1000_osdep.h | 136 + .../linuxapp/kni/ethtool/igb/e1000_phy.c | 3405 ++++++ .../linuxapp/kni/ethtool/igb/e1000_phy.h | 256 + .../linuxapp/kni/ethtool/igb/e1000_regs.h | 646 ++ lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h | 859 ++ .../linuxapp/kni/ethtool/igb/igb_debugfs.c | 28 + .../linuxapp/kni/ethtool/igb/igb_ethtool.c | 2857 +++++ .../linuxapp/kni/ethtool/igb/igb_hwmon.c | 260 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c | 10291 +++++++++++++++++++ .../linuxapp/kni/ethtool/igb/igb_param.c | 847 ++ .../linuxapp/kni/ethtool/igb/igb_procfs.c | 363 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c | 944 ++ .../linuxapp/kni/ethtool/igb/igb_regtest.h | 249 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c | 436 + lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h | 46 + lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c | 1482 +++ lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h | 3918 +++++++ .../linuxapp/kni/ethtool/igb/kcompat_ethtool.c | 1171 +++ lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING | 339 + lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h | 925 ++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c | 1296 +++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h | 44 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c | 2313 +++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h | 58 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.c | 1157 +++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_api.h | 168 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.c | 4082 ++++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_common.h | 140 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h | 168 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c | 2901 ++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h | 91 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_main.c | 2970 ++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h | 105 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h | 132 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c | 1847 ++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h | 137 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h | 73 + .../linuxapp/kni/ethtool/ixgbe/ixgbe_type.h | 3254 ++++++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c | 937 ++ .../linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h | 58 + .../linuxapp/kni/ethtool/ixgbe/kcompat.c | 1246 +++ .../linuxapp/kni/ethtool/ixgbe/kcompat.h | 3143 ++++++ lib/librte_eal/linuxapp/kni/kni_dev.h | 149 + lib/librte_eal/linuxapp/kni/kni_ethtool.c | 217 + lib/librte_eal/linuxapp/kni/kni_fifo.h | 108 + lib/librte_eal/linuxapp/kni/kni_misc.c | 688 ++ lib/librte_eal/linuxapp/kni/kni_net.c | 706 ++ lib/librte_eal/linuxapp/kni/kni_vhost.c | 857 ++ lib/librte_eal/linuxapp/xen_dom0/Makefile | 56 + lib/librte_eal/linuxapp/xen_dom0/compat.h | 15 + lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h | 107 + lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c | 780 ++ 244 files changed, 108631 insertions(+) create mode 100644 lib/librte_eal/Makefile create mode 100644 lib/librte_eal/bsdapp/Makefile create mode 100644 lib/librte_eal/bsdapp/contigmem/BSDmakefile create mode 100644 lib/librte_eal/bsdapp/contigmem/Makefile create mode 100644 lib/librte_eal/bsdapp/contigmem/contigmem.c create mode 100644 lib/librte_eal/bsdapp/eal/Makefile create mode 100644 lib/librte_eal/bsdapp/eal/eal.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_alarm.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_debug.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_hugepage_info.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_interrupts.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_lcore.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_log.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_memory.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_pci.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_thread.c create mode 100644 lib/librte_eal/bsdapp/eal/eal_timer.c create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h create mode 100644 lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h create mode 100644 lib/librte_eal/bsdapp/eal/rte_eal_version.map create mode 100644 lib/librte_eal/bsdapp/nic_uio/BSDmakefile create mode 100644 lib/librte_eal/bsdapp/nic_uio/Makefile create mode 100644 lib/librte_eal/bsdapp/nic_uio/nic_uio.c create mode 100644 lib/librte_eal/common/Makefile create mode 100644 lib/librte_eal/common/arch/arm/rte_cpuflags.c create mode 100644 lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c create mode 100644 lib/librte_eal/common/arch/tile/rte_cpuflags.c create mode 100644 lib/librte_eal/common/arch/x86/rte_cpuflags.c create mode 100644 lib/librte_eal/common/eal_common_cpuflags.c create mode 100644 lib/librte_eal/common/eal_common_dev.c create mode 100644 lib/librte_eal/common/eal_common_devargs.c create mode 100644 lib/librte_eal/common/eal_common_errno.c create mode 100644 lib/librte_eal/common/eal_common_hexdump.c create mode 100644 lib/librte_eal/common/eal_common_launch.c create mode 100644 lib/librte_eal/common/eal_common_lcore.c create mode 100644 lib/librte_eal/common/eal_common_log.c create mode 100644 lib/librte_eal/common/eal_common_memory.c create mode 100644 lib/librte_eal/common/eal_common_memzone.c create mode 100644 lib/librte_eal/common/eal_common_options.c create mode 100644 lib/librte_eal/common/eal_common_pci.c create mode 100644 lib/librte_eal/common/eal_common_pci_uio.c create mode 100644 lib/librte_eal/common/eal_common_proc.c create mode 100644 lib/librte_eal/common/eal_common_string_fns.c create mode 100644 lib/librte_eal/common/eal_common_tailqs.c create mode 100644 lib/librte_eal/common/eal_common_thread.c create mode 100644 lib/librte_eal/common/eal_common_timer.c create mode 100644 lib/librte_eal/common/eal_filesystem.h create mode 100644 lib/librte_eal/common/eal_hugepages.h create mode 100644 lib/librte_eal/common/eal_internal_cfg.h create mode 100644 lib/librte_eal/common/eal_options.h create mode 100644 lib/librte_eal/common/eal_private.h create mode 100644 lib/librte_eal/common/eal_thread.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_atomic_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_cycles_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/arm/rte_vect.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/tile/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic_32.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_atomic_64.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_cycles.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rtm.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/arch/x86/rte_vect.h create mode 100644 lib/librte_eal/common/include/generic/rte_atomic.h create mode 100644 lib/librte_eal/common/include/generic/rte_byteorder.h create mode 100644 lib/librte_eal/common/include/generic/rte_cpuflags.h create mode 100644 lib/librte_eal/common/include/generic/rte_cycles.h create mode 100644 lib/librte_eal/common/include/generic/rte_memcpy.h create mode 100644 lib/librte_eal/common/include/generic/rte_prefetch.h create mode 100644 lib/librte_eal/common/include/generic/rte_rwlock.h create mode 100644 lib/librte_eal/common/include/generic/rte_spinlock.h create mode 100644 lib/librte_eal/common/include/rte_alarm.h create mode 100644 lib/librte_eal/common/include/rte_branch_prediction.h create mode 100644 lib/librte_eal/common/include/rte_common.h create mode 100644 lib/librte_eal/common/include/rte_debug.h create mode 100644 lib/librte_eal/common/include/rte_dev.h create mode 100644 lib/librte_eal/common/include/rte_devargs.h create mode 100644 lib/librte_eal/common/include/rte_eal.h create mode 100644 lib/librte_eal/common/include/rte_eal_memconfig.h create mode 100644 lib/librte_eal/common/include/rte_errno.h create mode 100644 lib/librte_eal/common/include/rte_hexdump.h create mode 100644 lib/librte_eal/common/include/rte_interrupts.h create mode 100644 lib/librte_eal/common/include/rte_keepalive.h create mode 100644 lib/librte_eal/common/include/rte_launch.h create mode 100644 lib/librte_eal/common/include/rte_lcore.h create mode 100644 lib/librte_eal/common/include/rte_log.h create mode 100644 lib/librte_eal/common/include/rte_malloc.h create mode 100644 lib/librte_eal/common/include/rte_malloc_heap.h create mode 100644 lib/librte_eal/common/include/rte_memory.h create mode 100644 lib/librte_eal/common/include/rte_memzone.h create mode 100644 lib/librte_eal/common/include/rte_pci.h create mode 100644 lib/librte_eal/common/include/rte_pci_dev_feature_defs.h create mode 100644 lib/librte_eal/common/include/rte_pci_dev_features.h create mode 100644 lib/librte_eal/common/include/rte_pci_dev_ids.h create mode 100644 lib/librte_eal/common/include/rte_per_lcore.h create mode 100644 lib/librte_eal/common/include/rte_random.h create mode 100644 lib/librte_eal/common/include/rte_string_fns.h create mode 100644 lib/librte_eal/common/include/rte_tailq.h create mode 100644 lib/librte_eal/common/include/rte_time.h create mode 100644 lib/librte_eal/common/include/rte_version.h create mode 100644 lib/librte_eal/common/include/rte_warnings.h create mode 100644 lib/librte_eal/common/malloc_elem.c create mode 100644 lib/librte_eal/common/malloc_elem.h create mode 100644 lib/librte_eal/common/malloc_heap.c create mode 100644 lib/librte_eal/common/malloc_heap.h create mode 100644 lib/librte_eal/common/rte_keepalive.c create mode 100644 lib/librte_eal/common/rte_malloc.c create mode 100644 lib/librte_eal/linuxapp/Makefile create mode 100644 lib/librte_eal/linuxapp/eal/Makefile create mode 100644 lib/librte_eal/linuxapp/eal/eal.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_alarm.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_debug.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_hugepage_info.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_interrupts.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_ivshmem.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_lcore.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_log.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_memory.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_init.h create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_uio.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_thread.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_timer.c create mode 100644 lib/librte_eal/linuxapp/eal/eal_vfio.h create mode 100644 lib/librte_eal/linuxapp/eal/eal_xen_memory.c create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h create mode 100644 lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h create mode 100644 lib/librte_eal/linuxapp/eal/rte_eal_version.map create mode 100644 lib/librte_eal/linuxapp/igb_uio/Makefile create mode 100644 lib/librte_eal/linuxapp/igb_uio/compat.h create mode 100644 lib/librte_eal/linuxapp/igb_uio/igb_uio.c create mode 100644 lib/librte_eal/linuxapp/kni/Makefile create mode 100644 lib/librte_eal/linuxapp/kni/compat.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/README create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c create mode 100644 lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h create mode 100644 lib/librte_eal/linuxapp/kni/kni_dev.h create mode 100644 lib/librte_eal/linuxapp/kni/kni_ethtool.c create mode 100644 lib/librte_eal/linuxapp/kni/kni_fifo.h create mode 100644 lib/librte_eal/linuxapp/kni/kni_misc.c create mode 100644 lib/librte_eal/linuxapp/kni/kni_net.c create mode 100644 lib/librte_eal/linuxapp/kni/kni_vhost.c create mode 100644 lib/librte_eal/linuxapp/xen_dom0/Makefile create mode 100644 lib/librte_eal/linuxapp/xen_dom0/compat.h create mode 100644 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h create mode 100644 lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c (limited to 'lib/librte_eal') diff --git a/lib/librte_eal/Makefile b/lib/librte_eal/Makefile new file mode 100644 index 00000000..cf11a099 --- /dev/null +++ b/lib/librte_eal/Makefile @@ -0,0 +1,38 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-y += common +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += linuxapp +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += bsdapp + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/Makefile b/lib/librte_eal/bsdapp/Makefile new file mode 100644 index 00000000..0e6e2be9 --- /dev/null +++ b/lib/librte_eal/bsdapp/Makefile @@ -0,0 +1,38 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += contigmem +DIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += nic_uio + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/bsdapp/contigmem/BSDmakefile b/lib/librte_eal/bsdapp/contigmem/BSDmakefile new file mode 100644 index 00000000..f64374c6 --- /dev/null +++ b/lib/librte_eal/bsdapp/contigmem/BSDmakefile @@ -0,0 +1,36 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +KMOD= contigmem +SRCS= contigmem.c device_if.h bus_if.h + +.include diff --git a/lib/librte_eal/bsdapp/contigmem/Makefile b/lib/librte_eal/bsdapp/contigmem/Makefile new file mode 100644 index 00000000..bab005fd --- /dev/null +++ b/lib/librte_eal/bsdapp/contigmem/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = contigmem + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h + +# +# all source are stored in SRCS-y +# +SRCS-y := contigmem.c + +include $(RTE_SDK)/mk/rte.bsdmodule.mk diff --git a/lib/librte_eal/bsdapp/contigmem/contigmem.c b/lib/librte_eal/bsdapp/contigmem/contigmem.c new file mode 100644 index 00000000..c6ca3b9c --- /dev/null +++ b/lib/librte_eal/bsdapp/contigmem/contigmem.c @@ -0,0 +1,232 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +static int contigmem_load(void); +static int contigmem_unload(void); +static int contigmem_physaddr(SYSCTL_HANDLER_ARGS); + +static d_mmap_t contigmem_mmap; +static d_mmap_single_t contigmem_mmap_single; +static d_open_t contigmem_open; + +static int contigmem_num_buffers = RTE_CONTIGMEM_DEFAULT_NUM_BUFS; +static int64_t contigmem_buffer_size = RTE_CONTIGMEM_DEFAULT_BUF_SIZE; + +static eventhandler_tag contigmem_eh_tag; +static void *contigmem_buffers[RTE_CONTIGMEM_MAX_NUM_BUFS]; +static struct cdev *contigmem_cdev = NULL; + +TUNABLE_INT("hw.contigmem.num_buffers", &contigmem_num_buffers); +TUNABLE_QUAD("hw.contigmem.buffer_size", &contigmem_buffer_size); + +static SYSCTL_NODE(_hw, OID_AUTO, contigmem, CTLFLAG_RD, 0, "contigmem"); + +SYSCTL_INT(_hw_contigmem, OID_AUTO, num_buffers, CTLFLAG_RD, + &contigmem_num_buffers, 0, "Number of contigmem buffers allocated"); +SYSCTL_QUAD(_hw_contigmem, OID_AUTO, buffer_size, CTLFLAG_RD, + &contigmem_buffer_size, 0, "Size of each contiguous buffer"); + +static SYSCTL_NODE(_hw_contigmem, OID_AUTO, physaddr, CTLFLAG_RD, 0, + "physaddr"); + +MALLOC_DEFINE(M_CONTIGMEM, "contigmem", "contigmem(4) allocations"); + +static int contigmem_modevent(module_t mod, int type, void *arg) +{ + int error = 0; + + switch (type) { + case MOD_LOAD: + error = contigmem_load(); + break; + case MOD_UNLOAD: + error = contigmem_unload(); + break; + default: + break; + } + + return error; +} + +moduledata_t contigmem_mod = { + "contigmem", + (modeventhand_t)contigmem_modevent, + 0 +}; + +DECLARE_MODULE(contigmem, contigmem_mod, SI_SUB_DRIVERS, SI_ORDER_ANY); +MODULE_VERSION(contigmem, 1); + +static struct cdevsw contigmem_ops = { + .d_name = "contigmem", + .d_version = D_VERSION, + .d_mmap = contigmem_mmap, + .d_mmap_single = contigmem_mmap_single, + .d_open = contigmem_open, +}; + +static int +contigmem_load() +{ + char index_string[8], description[32]; + int i; + + if (contigmem_num_buffers > RTE_CONTIGMEM_MAX_NUM_BUFS) { + printf("%d buffers requested is greater than %d allowed\n", + contigmem_num_buffers, RTE_CONTIGMEM_MAX_NUM_BUFS); + return EINVAL; + } + + if (contigmem_buffer_size < PAGE_SIZE || + (contigmem_buffer_size & (contigmem_buffer_size - 1)) != 0) { + printf("buffer size 0x%lx is not greater than PAGE_SIZE and " + "power of two\n", contigmem_buffer_size); + return EINVAL; + } + + for (i = 0; i < contigmem_num_buffers; i++) { + contigmem_buffers[i] = + contigmalloc(contigmem_buffer_size, M_CONTIGMEM, M_ZERO, 0, + BUS_SPACE_MAXADDR, contigmem_buffer_size, 0); + + if (contigmem_buffers[i] == NULL) { + printf("contigmalloc failed for buffer %d\n", i); + return ENOMEM; + } + + printf("%2u: virt=%p phys=%p\n", i, contigmem_buffers[i], + (void *)pmap_kextract((vm_offset_t)contigmem_buffers[i])); + + snprintf(index_string, sizeof(index_string), "%d", i); + snprintf(description, sizeof(description), + "phys addr for buffer %d", i); + SYSCTL_ADD_PROC(NULL, + &SYSCTL_NODE_CHILDREN(_hw_contigmem, physaddr), OID_AUTO, + index_string, CTLTYPE_U64 | CTLFLAG_RD, + (void *)(uintptr_t)i, 0, contigmem_physaddr, "LU", + description); + } + + contigmem_cdev = make_dev_credf(0, &contigmem_ops, 0, NULL, UID_ROOT, + GID_WHEEL, 0600, "contigmem"); + + return 0; +} + +static int +contigmem_unload() +{ + int i; + + if (contigmem_cdev != NULL) + destroy_dev(contigmem_cdev); + + if (contigmem_eh_tag != NULL) + EVENTHANDLER_DEREGISTER(process_exit, contigmem_eh_tag); + + for (i = 0; i < RTE_CONTIGMEM_MAX_NUM_BUFS; i++) + if (contigmem_buffers[i] != NULL) + contigfree(contigmem_buffers[i], contigmem_buffer_size, + M_CONTIGMEM); + + return 0; +} + +static int +contigmem_physaddr(SYSCTL_HANDLER_ARGS) +{ + uint64_t physaddr; + int index = (int)(uintptr_t)arg1; + + physaddr = (uint64_t)vtophys(contigmem_buffers[index]); + return sysctl_handle_64(oidp, &physaddr, 0, req); +} + +static int +contigmem_open(struct cdev *cdev, int fflags, int devtype, + struct thread *td) +{ + return 0; +} + +static int +contigmem_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, + int prot, vm_memattr_t *memattr) +{ + + *paddr = offset; + return 0; +} + +static int +contigmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + struct vm_object **obj, int nprot) +{ + /* + * The buffer index is encoded in the offset. Divide the offset by + * PAGE_SIZE to get the index of the buffer requested by the user + * app. + */ + if ((*offset/PAGE_SIZE) >= contigmem_num_buffers) + return EINVAL; + + *offset = (vm_ooffset_t)vtophys(contigmem_buffers[*offset/PAGE_SIZE]); + *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, + curthread->td_ucred); + + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/Makefile b/lib/librte_eal/bsdapp/eal/Makefile new file mode 100644 index 00000000..9054ad61 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/Makefile @@ -0,0 +1,117 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_eal.a + +ARCH_DIR ?= $(RTE_ARCH) +VPATH += $(RTE_SDK)/lib/librte_eal/common +VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) + +CFLAGS += -I$(SRCDIR)/include +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include +CFLAGS += -I$(RTE_SDK)/lib/librte_ring +CFLAGS += -I$(RTE_SDK)/lib/librte_mempool +CFLAGS += $(WERROR_FLAGS) -O3 + +LDLIBS += -lexecinfo +LDLIBS += -lpthread +LDLIBS += -lgcc_s + +EXPORT_MAP := rte_eal_version.map + +LIBABIVER := 2 + +# specific to linuxapp exec-env +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_memory.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_hugepage_info.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_debug.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_interrupts.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_alarm.c + +# from common dir +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memzone.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_pci_uio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_memory.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_tailqs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_errno.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_cpuflags.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_string_fns.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_hexdump.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_dev.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_options.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_malloc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_elem.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_keepalive.c + +# from arch dir +SRCS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += rte_cpuflags.c + +CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) + +CFLAGS_eal.o := -D_GNU_SOURCE +#CFLAGS_eal_thread.o := -D_GNU_SOURCE +CFLAGS_eal_log.o := -D_GNU_SOURCE +CFLAGS_eal_common_log.o := -D_GNU_SOURCE + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_eal_thread.o += -Wno-return-type +CFLAGS_eal_hpet.o += -Wno-return-type +endif + +INC := rte_interrupts.h + +SYMLINK-$(CONFIG_RTE_EXEC_ENV_BSDAPP)-include/exec-env := \ + $(addprefix include/exec-env/,$(INC)) + +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_BSDAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/bsdapp/eal/eal.c b/lib/librte_eal/bsdapp/eal/eal.c new file mode 100644 index 00000000..06bfd4e0 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal.c @@ -0,0 +1,638 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" +#include "eal_options.h" + +#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) + +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; +/* early configuration structure, when memory config is not mmapped */ +static struct rte_mem_config early_mem_config; + +/* define fd variable here, because file needs to be kept open for the + * duration of the program, as we hold a write lock on it in the primary proc */ +static int mem_cfg_fd = -1; + +static struct flock wr_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = offsetof(struct rte_mem_config, memseg), + .l_len = sizeof(early_mem_config.memseg), +}; + +/* Address of global and public configuration */ +static struct rte_config rte_config = { + .mem_config = &early_mem_config, +}; + +/* internal configuration (per-core) */ +struct lcore_config lcore_config[RTE_MAX_LCORE]; + +/* internal configuration */ +struct internal_config internal_config; + +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + +/* Return a pointer to the configuration structure */ +struct rte_config * +rte_eal_get_configuration(void) +{ + return &rte_config; +} + +/* parse a sysfs (or other) file containing one integer value */ +int +eal_parse_sysfs_value(const char *filename, unsigned long *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + if ((f = fopen(filename, "r")) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoul(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + + +/* create memory configuration in shared/mmap memory. Take out + * a write lock on the memsegs, so we can auto-detect primary/secondary. + * This means we never close the file while running (auto-close on exit). + * We also don't lock the whole file, so that in future we can use read-locks + * on other parts, e.g. memzones, to detect if there are running secondary + * processes. */ +static void +rte_eal_config_create(void) +{ + void *rte_mem_cfg_addr; + int retval; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + if (retval < 0){ + close(mem_cfg_fd); + rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname); + } + + retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); + if (retval < 0){ + close(mem_cfg_fd); + rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary " + "process running?\n", pathname); + } + + rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + + if (rte_mem_cfg_addr == MAP_FAILED){ + rte_panic("Cannot mmap memory for rte_config\n"); + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; +} + +/* attach to an existing shared memory config */ +static void +rte_eal_config_attach(void) +{ + void *rte_mem_cfg_addr; + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + rte_mem_cfg_addr = mmap(NULL, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + close(mem_cfg_fd); + if (rte_mem_cfg_addr == MAP_FAILED) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; +} + +/* Detect if we are a primary or a secondary process */ +enum rte_proc_type_t +eal_proc_type_detect(void) +{ + enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; + const char *pathname = eal_runtime_config_path(); + + /* if we can open the file but not get a write-lock we are a secondary + * process. NOTE: if we get a file handle back, we keep that open + * and don't close it to prevent a race condition between multiple opens */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + + RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", + ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); + + return ptype; +} + +/* Sets up rte_config structure with the pointer to shared memory config.*/ +static void +rte_config_init(void) +{ + rte_config.process_type = internal_config.process_type; + + switch (rte_config.process_type){ + case RTE_PROC_PRIMARY: + rte_eal_config_create(); + break; + case RTE_PROC_SECONDARY: + rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); + break; + case RTE_PROC_AUTO: + case RTE_PROC_INVALID: + rte_panic("Invalid process type\n"); + } +} + +/* display usage */ +static void +eal_usage(const char *prgname) +{ + printf("\nUsage: %s ", prgname); + eal_common_usage(); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } +} + +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) +{ + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; +} + +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +/* Parse the arguments for --log-level only */ +static void +eal_log_level_parse(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + const int old_optind = optind; + const int old_optopt = optopt; + const int old_optreset = optreset; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + optreset = 1; + + eal_reset_internal_config(&internal_config); + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + int ret; + + /* getopt is not happy, stop right now */ + if (opt == '?') + break; + + ret = (opt == OPT_LOG_LEVEL_NUM) ? + eal_parse_common_option(opt, optarg, &internal_config) : 0; + + /* common parser is not happy */ + if (ret < 0) + break; + } + + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optreset = old_optreset; + optarg = old_optarg; +} + +/* Parse the argument given in the command line of the application */ +static int +eal_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + const int old_optind = optind; + const int old_optopt = optopt; + const int old_optreset = optreset; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + optreset = 1; + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + /* getopt is not happy, stop right now */ + if (opt == '?') { + eal_usage(prgname); + ret = -1; + goto out; + } + + ret = eal_parse_common_option(opt, optarg, &internal_config); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + /* common parser handled this option */ + if (ret == 0) + continue; + + switch (opt) { + case 'h': + eal_usage(prgname); + exit(EXIT_SUCCESS); + default: + if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { + RTE_LOG(ERR, EAL, "Option %c is not supported " + "on FreeBSD\n", opt); + } else if (opt >= OPT_LONG_MIN_NUM && + opt < OPT_LONG_MAX_NUM) { + RTE_LOG(ERR, EAL, "Option %s is not supported " + "on FreeBSD\n", + eal_long_options[option_index].name); + } else { + RTE_LOG(ERR, EAL, "Option %d is not supported " + "on FreeBSD\n", opt); + } + eal_usage(prgname); + ret = -1; + goto out; + } + } + + if (eal_adjust_config(&internal_config) != 0) { + ret = -1; + goto out; + } + + /* sanity checks */ + if (eal_check_common_options(&internal_config) != 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + + if (optind >= 0) + argv[optind-1] = prgname; + ret = optind-1; + +out: + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optreset = old_optreset; + optarg = old_optarg; + + return ret; +} + +static void +eal_check_mem_on_local_socket(void) +{ + const struct rte_memseg *ms; + int i, socket_id; + + socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); + + ms = rte_eal_get_physmem_layout(); + + for (i = 0; i < RTE_MAX_MEMSEG; i++) + if (ms[i].socket_id == socket_id && + ms[i].len > 0) + return; + + RTE_LOG(WARNING, EAL, "WARNING: Master core has no " + "memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} + +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; +} + +/* return non-zero if hugepages are enabled. */ +int rte_eal_has_hugepages(void) +{ + return !internal_config.no_hugetlbfs; +} + +/* Abstraction for port I/0 privilege */ +int +rte_eal_iopl_init(void) +{ + static int fd; + + fd = open("/dev/io", O_RDWR); + if (fd < 0) + return -1; + /* keep fd open for iopl */ + return 0; +} + +/* Launch threads, called at application init(). */ +int +rte_eal_init(int argc, char **argv) +{ + int i, fctret, ret; + pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (!rte_atomic32_test_and_set(&run_once)) + return -1; + + thread_id = pthread_self(); + + if (rte_eal_log_early_init() < 0) + rte_panic("Cannot init early logs\n"); + + eal_log_level_parse(argc, argv); + + /* set log level as early as possible */ + rte_set_log_level(internal_config.log_level); + + if (rte_eal_cpu_init() < 0) + rte_panic("Cannot detect lcores\n"); + + fctret = eal_parse_args(argc, argv); + if (fctret < 0) + exit(1); + + if (internal_config.no_hugetlbfs == 0 && + internal_config.process_type != RTE_PROC_SECONDARY && + eal_hugepage_info_init() < 0) + rte_panic("Cannot get hugepage information\n"); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) { + if (internal_config.no_hugetlbfs) + internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; + else + internal_config.memory = eal_get_hugepage_mem_size(); + } + + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif + } + + rte_srand(rte_rdtsc()); + + rte_config_init(); + + if (rte_eal_memory_init() < 0) + rte_panic("Cannot init memory\n"); + + if (rte_eal_memzone_init() < 0) + rte_panic("Cannot init memzone\n"); + + if (rte_eal_tailqs_init() < 0) + rte_panic("Cannot init tail queues for objects\n"); + +/* if (rte_eal_log_init(argv[0], internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n");*/ + + if (rte_eal_alarm_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_intr_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_timer_init() < 0) + rte_panic("Cannot init HPET or TSC timers\n"); + + if (rte_eal_pci_init() < 0) + rte_panic("Cannot init PCI\n"); + + eal_check_mem_on_local_socket(); + + if (eal_plugins_init() < 0) + rte_panic("Cannot init plugins\n"); + + eal_thread_init_master(rte_config.master_lcore); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%p;cpuset=[%s%s])\n", + rte_config.master_lcore, thread_id, cpuset, + ret == 0 ? "" : "..."); + + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + + RTE_LCORE_FOREACH_SLAVE(i) { + + /* + * create communication pipes between master thread + * and children + */ + if (pipe(lcore_config[i].pipe_master2slave) < 0) + rte_panic("Cannot create pipe\n"); + if (pipe(lcore_config[i].pipe_slave2master) < 0) + rte_panic("Cannot create pipe\n"); + + lcore_config[i].state = WAIT; + + /* create a thread for each lcore */ + ret = pthread_create(&lcore_config[i].thread_id, NULL, + eal_thread_loop, NULL); + if (ret != 0) + rte_panic("Cannot create thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "lcore-slave-%d", i); + pthread_set_name_np(lcore_config[i].thread_id, thread_name); + } + + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + + /* Probe & Initialize PCI devices */ + if (rte_eal_pci_probe()) + rte_panic("Cannot probe PCI\n"); + + rte_eal_mcfg_complete(); + + return fctret; +} + +/* get core role */ +enum rte_lcore_role_t +rte_eal_lcore_role(unsigned lcore_id) +{ + return rte_config.lcore_role[lcore_id]; +} + +enum rte_proc_type_t +rte_eal_process_type(void) +{ + return rte_config.process_type; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_alarm.c b/lib/librte_eal/bsdapp/eal/eal_alarm.c new file mode 100644 index 00000000..204df85d --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_alarm.c @@ -0,0 +1,60 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include + +#include +#include +#include "eal_private.h" + +int +rte_eal_alarm_init(void) +{ + return 0; +} + + +int +rte_eal_alarm_set(uint64_t us __rte_unused, + rte_eal_alarm_callback cb_fn __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_debug.c b/lib/librte_eal/bsdapp/eal/eal_debug.c new file mode 100644 index 00000000..907fbfa7 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_debug.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BACKTRACE_SIZE 256 + +/* dump the stack of the calling core */ +void rte_dump_stack(void) +{ + void *func[BACKTRACE_SIZE]; + char **symb = NULL; + int size; + + size = backtrace(func, BACKTRACE_SIZE); + symb = backtrace_symbols(func, size); + + if (symb == NULL) + return; + + while (size > 0) { + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, + "%d: [%s]\n", size, symb[size - 1]); + size --; + } + + free(symb); +} + +/* not implemented in this environment */ +void rte_dump_registers(void) +{ + return; +} + +/* call abort(), it will generate a coredump if enabled */ +void __rte_panic(const char *funcname, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + rte_dump_stack(); + rte_dump_registers(); + abort(); +} + +/* + * Like rte_panic this terminates the application. However, no traceback is + * provided and no core-dump is generated. + */ +void +rte_exit(int exit_code, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + if (exit_code != 0) + RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" + " Cause: ", exit_code); + + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + +#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR + exit(exit_code); +#else + rte_dump_stack(); + rte_dump_registers(); + abort(); +#endif +} diff --git a/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c new file mode 100644 index 00000000..8a33c30c --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_hugepage_info.c @@ -0,0 +1,134 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include + +#include +#include +#include "eal_hugepages.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" + +#define CONTIGMEM_DEV "/dev/contigmem" + +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + void *retval; + int fd = open(filename, O_CREAT | O_RDWR, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +/* + * No hugepage support on freebsd, but we dummy it, using contigmem driver + */ +int +eal_hugepage_info_init(void) +{ + size_t sysctl_size; + int num_buffers, fd, error; + int64_t buffer_size; + /* re-use the linux "internal config" structure for our memory data */ + struct hugepage_info *hpi = &internal_config.hugepage_info[0]; + struct hugepage_info *tmp_hpi; + + sysctl_size = sizeof(num_buffers); + error = sysctlbyname("hw.contigmem.num_buffers", &num_buffers, + &sysctl_size, NULL, 0); + + if (error != 0) { + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.num_buffers"); + return -1; + } + + sysctl_size = sizeof(buffer_size); + error = sysctlbyname("hw.contigmem.buffer_size", &buffer_size, + &sysctl_size, NULL, 0); + + if (error != 0) { + RTE_LOG(ERR, EAL, "could not read sysctl hw.contigmem.buffer_size"); + return -1; + } + + fd = open(CONTIGMEM_DEV, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "could not open "CONTIGMEM_DEV"\n"); + return -1; + } + + if (buffer_size >= 1<<30) + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dGB\n", + num_buffers, (int)(buffer_size>>30)); + else if (buffer_size >= 1<<20) + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dMB\n", + num_buffers, (int)(buffer_size>>20)); + else + RTE_LOG(INFO, EAL, "Contigmem driver has %d buffers, each of size %dKB\n", + num_buffers, (int)(buffer_size>>10)); + + internal_config.num_hugepage_sizes = 1; + hpi->hugedir = CONTIGMEM_DEV; + hpi->hugepage_sz = buffer_size; + hpi->num_pages[0] = num_buffers; + hpi->lock_descriptor = fd; + + tmp_hpi = create_shared_memory(eal_hugepage_info_path(), + sizeof(struct hugepage_info)); + if (tmp_hpi == NULL ) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + return -1; + } + + memcpy(tmp_hpi, hpi, sizeof(struct hugepage_info)); + + if ( munmap(tmp_hpi, sizeof(struct hugepage_info)) < 0) { + RTE_LOG(ERR, EAL, "Failed to unmap shared memory!\n"); + return -1; + } + + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_interrupts.c b/lib/librte_eal/bsdapp/eal/eal_interrupts.c new file mode 100644 index 00000000..836e4836 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_interrupts.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include "eal_private.h" + +int +rte_intr_callback_register(struct rte_intr_handle *intr_handle __rte_unused, + rte_intr_callback_fn cb __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_intr_callback_unregister(struct rte_intr_handle *intr_handle __rte_unused, + rte_intr_callback_fn cb_fn __rte_unused, + void *cb_arg __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_intr_enable(struct rte_intr_handle *intr_handle __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_intr_disable(struct rte_intr_handle *intr_handle __rte_unused) +{ + return -ENOTSUP; +} + +int +rte_eal_intr_init(void) +{ + return 0; +} + +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data) +{ + RTE_SET_USED(intr_handle); + RTE_SET_USED(epfd); + RTE_SET_USED(op); + RTE_SET_USED(vec); + RTE_SET_USED(data); + + return -ENOTSUP; +} + +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) +{ + RTE_SET_USED(intr_handle); + RTE_SET_USED(nb_efd); + + return 0; +} + +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); +} + +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); + return 0; +} + +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); + return 1; +} + +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) +{ + RTE_SET_USED(intr_handle); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_lcore.c b/lib/librte_eal/bsdapp/eal/eal_lcore.c new file mode 100644 index 00000000..b8bfafde --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_lcore.c @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +/* No topology information available on FreeBSD including NUMA info */ +unsigned +eal_cpu_core_id(__rte_unused unsigned lcore_id) +{ + return 0; +} + +static int +eal_get_ncpus(void) +{ + int mib[2] = {CTL_HW, HW_NCPU}; + int ncpu; + size_t len = sizeof(ncpu); + + sysctl(mib, 2, &ncpu, &len, NULL, 0); + RTE_LOG(INFO, EAL, "Sysctl reports %d cpus\n", ncpu); + return ncpu; +} + +unsigned +eal_cpu_socket_id(__rte_unused unsigned cpu_id) +{ + return 0; +} + +/* Check if a cpu is present by the presence of the + * cpu information for it. + */ +int +eal_cpu_detected(unsigned lcore_id) +{ + const unsigned ncpus = eal_get_ncpus(); + return lcore_id < ncpus; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_log.c b/lib/librte_eal/bsdapp/eal/eal_log.c new file mode 100644 index 00000000..a425f7a8 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_log.c @@ -0,0 +1,57 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include + +#include + +/* + * set the log to default function, called during eal init process, + * once memzones are available. + */ +int +rte_eal_log_init(const char *id __rte_unused, int facility __rte_unused) +{ + if (rte_eal_common_log_init(stderr) < 0) + return -1; + return 0; +} + +int +rte_eal_log_early_init(void) +{ + rte_openlog_stream(stderr); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_memory.c b/lib/librte_eal/bsdapp/eal/eal_memory.c new file mode 100644 index 00000000..3614da8d --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_memory.c @@ -0,0 +1,194 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" + +#define EAL_PAGE_SIZE (sysconf(_SC_PAGESIZE)) + +/* + * Get physical address of any mapped virtual address in the current process. + */ +phys_addr_t +rte_mem_virt2phy(const void *virtaddr) +{ + /* XXX not implemented. This function is only used by + * rte_mempool_virt2phy() when hugepages are disabled. */ + (void)virtaddr; + return RTE_BAD_PHYS_ADDR; +} + +int +rte_eal_hugepage_init(void) +{ + struct rte_mem_config *mcfg; + uint64_t total_mem = 0; + void *addr; + unsigned i, j, seg_idx = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* for debug purposes, hugetlbfs can be disabled */ + if (internal_config.no_hugetlbfs) { + addr = malloc(internal_config.memory); + mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].addr = addr; + mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].len = internal_config.memory; + mcfg->memseg[0].socket_id = 0; + return 0; + } + + /* map all hugepages and sort them */ + for (i = 0; i < internal_config.num_hugepage_sizes; i ++){ + struct hugepage_info *hpi; + + hpi = &internal_config.hugepage_info[i]; + for (j = 0; j < hpi->num_pages[0]; j++) { + struct rte_memseg *seg; + uint64_t physaddr; + int error; + size_t sysctl_size = sizeof(physaddr); + char physaddr_str[64]; + + addr = mmap(NULL, hpi->hugepage_sz, PROT_READ|PROT_WRITE, + MAP_SHARED, hpi->lock_descriptor, + j * EAL_PAGE_SIZE); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + j, hpi->hugedir); + return -1; + } + + snprintf(physaddr_str, sizeof(physaddr_str), "hw.contigmem" + ".physaddr.%d", j); + error = sysctlbyname(physaddr_str, &physaddr, &sysctl_size, + NULL, 0); + if (error < 0) { + RTE_LOG(ERR, EAL, "Failed to get physical addr for buffer %u " + "from %s\n", j, hpi->hugedir); + return -1; + } + + seg = &mcfg->memseg[seg_idx++]; + seg->addr = addr; + seg->phys_addr = physaddr; + seg->hugepage_sz = hpi->hugepage_sz; + seg->len = hpi->hugepage_sz; + seg->nchannel = mcfg->nchannel; + seg->nrank = mcfg->nrank; + seg->socket_id = 0; + + RTE_LOG(INFO, EAL, "Mapped memory segment %u @ %p: physaddr:0x%" + PRIx64", len %zu\n", + seg_idx, addr, physaddr, hpi->hugepage_sz); + if (total_mem >= internal_config.memory || + seg_idx >= RTE_MAX_MEMSEG) + break; + } + } + return 0; +} + +int +rte_eal_hugepage_attach(void) +{ + const struct hugepage_info *hpi; + int fd_hugepage_info, fd_hugepage = -1; + unsigned i = 0; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + /* Obtain a file descriptor for hugepage_info */ + fd_hugepage_info = open(eal_hugepage_info_path(), O_RDONLY); + if (fd_hugepage_info < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + return -1; + } + + /* Map the shared hugepage_info into the process address spaces */ + hpi = mmap(NULL, sizeof(struct hugepage_info), PROT_READ, MAP_PRIVATE, + fd_hugepage_info, 0); + if (hpi == NULL) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + goto error; + } + + /* Obtain a file descriptor for contiguous memory */ + fd_hugepage = open(hpi->hugedir, O_RDWR); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", hpi->hugedir); + goto error; + } + + /* Map the contiguous memory into each memory segment */ + for (i = 0; i < hpi->num_pages[0]; i++) { + + void *addr; + struct rte_memseg *seg = &mcfg->memseg[i]; + + addr = mmap(seg->addr, hpi->hugepage_sz, PROT_READ|PROT_WRITE, + MAP_SHARED|MAP_FIXED, fd_hugepage, + i * EAL_PAGE_SIZE); + if (addr == MAP_FAILED || addr != seg->addr) { + RTE_LOG(ERR, EAL, "Failed to mmap buffer %u from %s\n", + i, hpi->hugedir); + goto error; + } + + } + + /* hugepage_info is no longer required */ + munmap((void *)(uintptr_t)hpi, sizeof(struct hugepage_info)); + close(fd_hugepage_info); + close(fd_hugepage); + return 0; + +error: + if (fd_hugepage_info >= 0) + close(fd_hugepage_info); + if (fd_hugepage >= 0) + close(fd_hugepage); + return -1; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_pci.c b/lib/librte_eal/bsdapp/eal/eal_pci.c new file mode 100644 index 00000000..2d16d782 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_pci.c @@ -0,0 +1,633 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined(RTE_ARCH_X86) +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in + * sysfs. Moreover, when a registered driver matches a device, the + * kernel driver currently using it is unloaded and replaced by + * igb_uio module, which is a very minimal userland driver for Intel + * network card, only providing access to PCI BAR to applications, and + * enabling bus master. + */ + +/* unbind kernel driver for this device */ +int +pci_unbind_kernel_driver(struct rte_pci_device *dev __rte_unused) +{ + RTE_LOG(ERR, EAL, "RTE_PCI_DRV_FORCE_UNBIND flag is not implemented " + "for BSD\n"); + return -ENOTSUP; +} + +/* Map pci device */ +int +rte_eal_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_eal_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources */ + switch (dev->kdrv) { + case RTE_KDRV_NIC_UIO: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char devname[PATH_MAX]; /* contains the /dev/uioX */ + struct rte_pci_addr *loc; + + loc = &dev->addr; + + snprintf(devname, sizeof(devname), "/dev/uio@pci:%u:%u:%u", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + if (access(devname, O_RDWR) < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char *devname; + void *mapaddr; + uint64_t offset; + uint64_t pagesz; + struct pci_map *maps; + + maps = uio_res->maps; + devname = uio_res->path; + pagesz = sysconf(_SC_PAGESIZE); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* if matching map is found, then use it */ + offset = res_idx * pagesz; + mapaddr = pci_map_resource(NULL, fd, (off_t)offset, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = offset; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +static int +pci_scan_one(int dev_pci_fd, struct pci_conf *conf) +{ + struct rte_pci_device *dev; + struct pci_bar_io bar; + unsigned i, max; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) { + return -1; + } + + memset(dev, 0, sizeof(*dev)); + dev->addr.domain = conf->pc_sel.pc_domain; + dev->addr.bus = conf->pc_sel.pc_bus; + dev->addr.devid = conf->pc_sel.pc_dev; + dev->addr.function = conf->pc_sel.pc_func; + + /* get vendor id */ + dev->id.vendor_id = conf->pc_vendor; + + /* get device id */ + dev->id.device_id = conf->pc_device; + + /* get subsystem_vendor id */ + dev->id.subsystem_vendor_id = conf->pc_subvendor; + + /* get subsystem_device id */ + dev->id.subsystem_device_id = conf->pc_subdevice; + + /* TODO: get max_vfs */ + dev->max_vfs = 0; + + /* FreeBSD has no NUMA support (yet) */ + dev->numa_node = 0; + + /* FreeBSD has only one pass through driver */ + dev->kdrv = RTE_KDRV_NIC_UIO; + + /* parse resources */ + switch (conf->pc_hdr & PCIM_HDRTYPE) { + case PCIM_HDRTYPE_NORMAL: + max = PCIR_MAX_BAR_0; + break; + case PCIM_HDRTYPE_BRIDGE: + max = PCIR_MAX_BAR_1; + break; + case PCIM_HDRTYPE_CARDBUS: + max = PCIR_MAX_BAR_2; + break; + default: + goto skipdev; + } + + for (i = 0; i <= max; i++) { + bar.pbi_sel = conf->pc_sel; + bar.pbi_reg = PCIR_BAR(i); + if (ioctl(dev_pci_fd, PCIOCGETBAR, &bar) < 0) + continue; + + dev->mem_resource[i].len = bar.pbi_length; + if (PCI_BAR_IO(bar.pbi_base)) { + dev->mem_resource[i].addr = (void *)(bar.pbi_base & ~((uint64_t)0xf)); + continue; + } + dev->mem_resource[i].phys_addr = bar.pbi_base & ~((uint64_t)0xf); + } + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&pci_device_list)) { + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + else { + struct rte_pci_device *dev2 = NULL; + int ret; + + TAILQ_FOREACH(dev2, &pci_device_list, next) { + ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + else if (ret < 0) { + TAILQ_INSERT_BEFORE(dev2, dev, next); + return 0; + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + memmove(dev2->mem_resource, + dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + return 0; + } + } + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + + return 0; + +skipdev: + free(dev); + return 0; +} + +/* + * Scan the content of the PCI bus, and add the devices in the devices + * list. Call pci_scan_one() for each pci entry found. + */ +int +rte_eal_pci_scan(void) +{ + int fd; + unsigned dev_count = 0; + struct pci_conf matches[16]; + struct pci_conf_io conf_io = { + .pat_buf_len = 0, + .num_patterns = 0, + .patterns = NULL, + .match_buf_len = sizeof(matches), + .matches = &matches[0], + }; + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + do { + unsigned i; + if (ioctl(fd, PCIOCGETCONF, &conf_io) < 0) { + RTE_LOG(ERR, EAL, "%s(): error with ioctl on /dev/pci: %s\n", + __func__, strerror(errno)); + goto error; + } + + for (i = 0; i < conf_io.num_matches; i++) + if (pci_scan_one(fd, &matches[i]) < 0) + goto error; + + dev_count += conf_io.num_matches; + } while(conf_io.status == PCI_GETCONF_MORE_DEVS); + + close(fd); + + RTE_LOG(ERR, EAL, "PCI scan found %u devices\n", dev_count); + return 0; + +error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Read PCI config space. */ +int rte_eal_pci_read_config(const struct rte_pci_device *dev, + void *buf, size_t len, off_t offset) +{ + int fd = -1; + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + .pi_width = len, + }; + + if (len == 3 || len > sizeof(pi.pi_data)) { + RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); + goto error; + } + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCREAD, &pi) < 0) + goto error; + close(fd); + + memcpy(buf, &pi.pi_data, len); + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +/* Write PCI config space. */ +int rte_eal_pci_write_config(const struct rte_pci_device *dev, + const void *buf, size_t len, off_t offset) +{ + int fd = -1; + + struct pci_io pi = { + .pi_sel = { + .pc_domain = dev->addr.domain, + .pc_bus = dev->addr.bus, + .pc_dev = dev->addr.devid, + .pc_func = dev->addr.function, + }, + .pi_reg = offset, + .pi_data = *(const uint32_t *)buf, + .pi_width = len, + }; + + if (len == 3 || len > sizeof(pi.pi_data)) { + RTE_LOG(ERR, EAL, "%s(): invalid pci read length\n", __func__); + goto error; + } + + memcpy(&pi.pi_data, buf, len); + + fd = open("/dev/pci", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): error opening /dev/pci\n", __func__); + goto error; + } + + if (ioctl(fd, PCIOCWRITE, &pi) < 0) + goto error; + + close(fd); + return 0; + + error: + if (fd >= 0) + close(fd); + return -1; +} + +int +rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret; + + switch (dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + if ((uintptr_t) dev->mem_resource[bar].addr <= UINT16_MAX) { + p->base = (uintptr_t)dev->mem_resource[bar].addr; + ret = 0; + } else + ret = -1; + break; +#endif + default: + ret = -1; + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +static void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + uint8_t *d; + int size; + unsigned short reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + *(uint32_t *)d = inl(reg); + } else if (len >= 2) { + size = 2; + *(uint16_t *)d = inw(reg); + } else { + size = 1; + *d = inb(reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_eal_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + default: + break; + } +} + +static void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + const uint8_t *s; + int size; + unsigned short reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + outl(*(const uint32_t *)s, reg); + } else if (len >= 2) { + size = 2; + outw(*(const uint16_t *)s, reg); + } else { + size = 1; + outb(*s, reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +rte_eal_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { + case RTE_KDRV_NIC_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + default: + break; + } +} + +int +rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret; + + switch (p->dev->kdrv) { +#if defined(RTE_ARCH_X86) + case RTE_KDRV_NIC_UIO: + ret = 0; + break; +#endif + default: + ret = -1; + break; + } + + return ret; +} + +/* Init the PCI EAL subsystem */ +int +rte_eal_pci_init(void) +{ + TAILQ_INIT(&pci_driver_list); + TAILQ_INIT(&pci_device_list); + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + + if (rte_eal_pci_scan() < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); + return -1; + } + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_thread.c b/lib/librte_eal/bsdapp/eal/eal_thread.c new file mode 100644 index 00000000..9a034373 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_thread.c @@ -0,0 +1,201 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; +RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; +RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); + +/* + * Send a message to a slave lcore identified by slave_id to call a + * function f with argument arg. Once the execution is done, the + * remote lcore switch in FINISHED state. + */ +int +rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id) +{ + int n; + char c = 0; + int m2s = lcore_config[slave_id].pipe_master2slave[1]; + int s2m = lcore_config[slave_id].pipe_slave2master[0]; + + if (lcore_config[slave_id].state != WAIT) + return -EBUSY; + + lcore_config[slave_id].f = f; + lcore_config[slave_id].arg = arg; + + /* send message */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(m2s, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + /* wait ack */ + do { + n = read(s2m, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + return 0; +} + +/* set affinity for current thread */ +static int +eal_thread_set_affinity(void) +{ + unsigned lcore_id = rte_lcore_id(); + + /* acquire system unique id */ + rte_gettid(); + + /* update EAL thread core affinity */ + return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset); +} + +void eal_thread_init_master(unsigned lcore_id) +{ + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); +} + +/* main loop of threads */ +__attribute__((noreturn)) void * +eal_thread_loop(__attribute__((unused)) void *arg) +{ + char c; + int n, ret; + unsigned lcore_id; + pthread_t thread_id; + int m2s, s2m; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + + thread_id = pthread_self(); + + /* retrieve our lcore_id from the configuration structure */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (thread_id == lcore_config[lcore_id].thread_id) + break; + } + if (lcore_id == RTE_MAX_LCORE) + rte_panic("cannot retrieve lcore id\n"); + + m2s = lcore_config[lcore_id].pipe_master2slave[0]; + s2m = lcore_config[lcore_id].pipe_slave2master[1]; + + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%p;cpuset=[%s%s])\n", + lcore_id, thread_id, cpuset, ret == 0 ? "" : "..."); + + /* read on our pipe to get commands */ + while (1) { + void *fct_arg; + + /* wait command */ + do { + n = read(m2s, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + lcore_config[lcore_id].state = RUNNING; + + /* send ack */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(s2m, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + if (lcore_config[lcore_id].f == NULL) + rte_panic("NULL function pointer\n"); + + /* call the function and store the return value */ + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; + rte_wmb(); + lcore_config[lcore_id].state = FINISHED; + } + + /* never reached */ + /* pthread_exit(NULL); */ + /* return NULL; */ +} + +/* require calling thread tid by gettid() */ +int rte_sys_gettid(void) +{ + long lwpid; + thr_self(&lwpid); + return (int)lwpid; +} diff --git a/lib/librte_eal/bsdapp/eal/eal_timer.c b/lib/librte_eal/bsdapp/eal/eal_timer.c new file mode 100644 index 00000000..f12d9bd2 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/eal_timer.c @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +#ifdef RTE_LIBEAL_USE_HPET +#warning HPET is not supported in FreeBSD +#endif + +enum timer_source eal_timer_source = EAL_TIMER_TSC; + +uint64_t +get_tsc_freq(void) +{ + size_t sz; + int tmp; + uint64_t tsc_hz; + + sz = sizeof(tmp); + tmp = 0; + + if (sysctlbyname("kern.timecounter.smp_tsc", &tmp, &sz, NULL, 0)) + RTE_LOG(WARNING, EAL, "%s\n", strerror(errno)); + else if (tmp != 1) + RTE_LOG(WARNING, EAL, "TSC is not safe to use in SMP mode\n"); + + tmp = 0; + + if (sysctlbyname("kern.timecounter.invariant_tsc", &tmp, &sz, NULL, 0)) + RTE_LOG(WARNING, EAL, "%s\n", strerror(errno)); + else if (tmp != 1) + RTE_LOG(WARNING, EAL, "TSC is not invariant\n"); + + sz = sizeof(tsc_hz); + if (sysctlbyname("machdep.tsc_freq", &tsc_hz, &sz, NULL, 0)) { + RTE_LOG(WARNING, EAL, "%s\n", strerror(errno)); + return 0; + } + + return tsc_hz; +} + +int +rte_eal_timer_init(void) +{ + set_tsc_freq(); + return 0; +} diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h new file mode 100644 index 00000000..99a33432 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_dom0_common.h @@ -0,0 +1,107 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_DOM0_COMMON_H_ +#define _RTE_DOM0_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +#define DOM0_NAME_MAX 256 +#define DOM0_MM_DEV "/dev/dom0_mm" + +#define DOM0_CONTIG_NUM_ORDER 9 /**< 2M order */ +#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ +#define DOM0_MEMBLOCK_SIZE 0x200000 /**< Maximum nb. of memory block(2M). */ +#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ +#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ + +#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) +#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) +#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) +#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) + +/** + * A structure used to store memory information. + */ +struct memory_info { + char name[DOM0_NAME_MAX]; + uint64_t size; +}; + +/** + * A structure used to store memory segment information. + */ +struct memseg_info { + uint32_t idx; + uint64_t pfn; + uint64_t size; + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +}; + +/** + * A structure used to store memory block information. + */ +struct memblock_info { + uint8_t exchange_flag; + uint64_t vir_addr; + uint64_t pfn; + uint64_t mfn; +}; +#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h new file mode 100644 index 00000000..c1995ee1 --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/include/exec-env/rte_interrupts.h @@ -0,0 +1,137 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#error "don't include this file directly, please include generic " +#endif + +#ifndef _RTE_BSDAPP_INTERRUPTS_H_ +#define _RTE_BSDAPP_INTERRUPTS_H_ + +#define RTE_INTR_VEC_ZERO_OFFSET 0 +#define RTE_INTR_VEC_RXTX_OFFSET 1 + +#define RTE_MAX_RXTX_INTR_VEC_ID 32 + +enum rte_intr_handle_type { + RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UIO, /**< uio device handle */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_MAX +}; + +/** Handle for interrupts. */ +struct rte_intr_handle { + int fd; /**< file descriptor */ + int uio_cfg_fd; /**< UIO config file descriptor */ + enum rte_intr_handle_type type; /**< handle type */ + int max_intr; /**< max interrupt requested */ + uint32_t nb_efd; /**< number of available efds */ + int *intr_vec; /**< intr vector number array */ +}; + +/** + * @param intr_handle + * Pointer to the interrupt handle. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * @param op + * The operation be performed for the vector. + * Operation type of {ADD, DEL}. + * @param vec + * RX intr vector number added to the epoll instance wait list. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data); + +/** + * It enables the fastpath event fds if it's necessary. + * It creates event fds when multi-vectors allowed, + * otherwise it multiplexes the single event fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + * @param nb_efd + * Number of interrupt vector trying to enable. + * The value 0 is not allowed. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); + +/** + * It disable the fastpath event fds. + * It deletes registered eventfds and closes the open fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle); + +/** + * The fastpath interrupt is enabled or not. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); + +/** + * The interrupt handle instance allows other cause or not. + * Other cause stands for none fastpath interrupt. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int rte_intr_allow_others(struct rte_intr_handle *intr_handle); + +/** + * The multiple interrupt vector capability of interrupt handle instance. + * It returns zero if no multiple interrupt vector support. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); + +#endif /* _RTE_BSDAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/bsdapp/eal/rte_eal_version.map b/lib/librte_eal/bsdapp/eal/rte_eal_version.map new file mode 100644 index 00000000..58c2951e --- /dev/null +++ b/lib/librte_eal/bsdapp/eal/rte_eal_version.map @@ -0,0 +1,153 @@ +DPDK_2.0 { + global: + + __rte_panic; + devargs_list; + eal_parse_sysfs_value; + eal_timer_source; + lcore_config; + pci_device_list; + pci_driver_list; + per_lcore__lcore_id; + per_lcore__rte_errno; + rte_calloc; + rte_calloc_socket; + rte_cpu_check_supported; + rte_cpu_get_flag_enabled; + rte_cycles_vmware_tsc_map; + rte_delay_us; + rte_dump_physmem_layout; + rte_dump_registers; + rte_dump_stack; + rte_dump_tailq; + rte_eal_alarm_cancel; + rte_eal_alarm_set; + rte_eal_dev_init; + rte_eal_devargs_add; + rte_eal_devargs_dump; + rte_eal_devargs_type_count; + rte_eal_driver_register; + rte_eal_driver_unregister; + rte_eal_get_configuration; + rte_eal_get_lcore_state; + rte_eal_get_physmem_layout; + rte_eal_get_physmem_size; + rte_eal_has_hugepages; + rte_eal_hpet_init; + rte_eal_init; + rte_eal_iopl_init; + rte_eal_lcore_role; + rte_eal_mp_remote_launch; + rte_eal_mp_wait_lcore; + rte_eal_parse_devargs_str; + rte_eal_pci_dump; + rte_eal_pci_probe; + rte_eal_pci_probe_one; + rte_eal_pci_register; + rte_eal_pci_scan; + rte_eal_pci_unregister; + rte_eal_process_type; + rte_eal_remote_launch; + rte_eal_tailq_lookup; + rte_eal_tailq_register; + rte_eal_vdev_init; + rte_eal_vdev_uninit; + rte_eal_wait_lcore; + rte_exit; + rte_free; + rte_get_hpet_cycles; + rte_get_hpet_hz; + rte_get_log_level; + rte_get_log_type; + rte_get_tsc_hz; + rte_hexdump; + rte_intr_callback_register; + rte_intr_callback_unregister; + rte_intr_disable; + rte_intr_enable; + rte_log; + rte_log_add_in_history; + rte_log_cur_msg_loglevel; + rte_log_cur_msg_logtype; + rte_log_dump_history; + rte_log_set_history; + rte_logs; + rte_malloc; + rte_malloc_dump_stats; + rte_malloc_get_socket_stats; + rte_malloc_set_limit; + rte_malloc_socket; + rte_malloc_validate; + rte_malloc_virt2phy; + rte_mem_lock_page; + rte_mem_phy2mch; + rte_mem_virt2phy; + rte_memdump; + rte_memory_get_nchannel; + rte_memory_get_nrank; + rte_memzone_dump; + rte_memzone_lookup; + rte_memzone_reserve; + rte_memzone_reserve_aligned; + rte_memzone_reserve_bounded; + rte_memzone_walk; + rte_openlog_stream; + rte_realloc; + rte_set_application_usage_hook; + rte_set_log_level; + rte_set_log_type; + rte_socket_id; + rte_strerror; + rte_strsplit; + rte_sys_gettid; + rte_thread_get_affinity; + rte_thread_set_affinity; + rte_vlog; + rte_xen_dom0_memory_attach; + rte_xen_dom0_memory_init; + rte_zmalloc; + rte_zmalloc_socket; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_eal_pci_detach; + rte_eal_pci_read_config; + rte_eal_pci_write_config; + rte_intr_allow_others; + rte_intr_dp_is_en; + rte_intr_efd_disable; + rte_intr_efd_enable; + rte_intr_rx_ctl; + rte_memzone_free; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_intr_cap_multiple; + rte_keepalive_create; + rte_keepalive_dispatch_pings; + rte_keepalive_mark_alive; + rte_keepalive_register_core; + rte_xen_dom0_supported; + +} DPDK_2.1; + +DPDK_16.04 { + global: + + rte_cpu_get_flag_name; + rte_eal_pci_ioport_map; + rte_eal_pci_ioport_read; + rte_eal_pci_ioport_unmap; + rte_eal_pci_ioport_write; + rte_eal_pci_map_device; + rte_eal_pci_unmap_device; + rte_eal_primary_proc_alive; + +} DPDK_2.2; diff --git a/lib/librte_eal/bsdapp/nic_uio/BSDmakefile b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile new file mode 100644 index 00000000..5454ed85 --- /dev/null +++ b/lib/librte_eal/bsdapp/nic_uio/BSDmakefile @@ -0,0 +1,36 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +KMOD= nic_uio +SRCS= nic_uio.c device_if.h bus_if.h pci_if.h + +.include diff --git a/lib/librte_eal/bsdapp/nic_uio/Makefile b/lib/librte_eal/bsdapp/nic_uio/Makefile new file mode 100644 index 00000000..89957615 --- /dev/null +++ b/lib/librte_eal/bsdapp/nic_uio/Makefile @@ -0,0 +1,52 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = nic_uio + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h + +# +# all source are stored in SRCS-y +# +SRCS-y := nic_uio.c + +include $(RTE_SDK)/mk/rte.bsdmodule.mk diff --git a/lib/librte_eal/bsdapp/nic_uio/nic_uio.c b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c new file mode 100644 index 00000000..99a4975c --- /dev/null +++ b/lib/librte_eal/bsdapp/nic_uio/nic_uio.c @@ -0,0 +1,335 @@ +/* - + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +__FBSDID("$FreeBSD$"); + +#include /* defines used in kernel.h */ +#include +#include /* types used in module initialization */ +#include /* cdevsw struct */ +#include /* structs, prototypes for pci bus stuff and DEVMETHOD */ +#include +#include +#include +#include + +#include +#include /* For pci_get macros! */ +#include /* The softc holds our per-instance data. */ +#include +#include +#include +#include +#include + + +#define MAX_BARS (PCIR_MAX_BAR_0 + 1) + +#define MAX_DETACHED_DEVICES 128 +static device_t detached_devices[MAX_DETACHED_DEVICES] = {}; +static int num_detached = 0; + +struct nic_uio_softc { + device_t dev_t; + struct cdev *my_cdev; + int bar_id[MAX_BARS]; + struct resource *bar_res[MAX_BARS]; + u_long bar_start[MAX_BARS]; + u_long bar_size[MAX_BARS]; +}; + +/* Function prototypes */ +static d_open_t nic_uio_open; +static d_close_t nic_uio_close; +static d_mmap_t nic_uio_mmap; +static d_mmap_single_t nic_uio_mmap_single; +static int nic_uio_probe(device_t dev); +static int nic_uio_attach(device_t dev); +static int nic_uio_detach(device_t dev); +static int nic_uio_shutdown(void); +static int nic_uio_modevent(module_t mod, int type, void *arg); + +static struct cdevsw uio_cdevsw = { + .d_name = "nic_uio", + .d_version = D_VERSION, + .d_open = nic_uio_open, + .d_close = nic_uio_close, + .d_mmap = nic_uio_mmap, + .d_mmap_single = nic_uio_mmap_single, +}; + +static device_method_t nic_uio_methods[] = { + DEVMETHOD(device_probe, nic_uio_probe), + DEVMETHOD(device_attach, nic_uio_attach), + DEVMETHOD(device_detach, nic_uio_detach), + DEVMETHOD_END +}; + +struct device { + int vend; + int dev; +}; + +struct pci_bdf { + uint32_t bus; + uint32_t devid; + uint32_t function; +}; + +static devclass_t nic_uio_devclass; + +DEFINE_CLASS_0(nic_uio, nic_uio_driver, nic_uio_methods, sizeof(struct nic_uio_softc)); +DRIVER_MODULE(nic_uio, pci, nic_uio_driver, nic_uio_devclass, nic_uio_modevent, 0); + +static int +nic_uio_mmap(struct cdev *cdev, vm_ooffset_t offset, vm_paddr_t *paddr, + int prot, vm_memattr_t *memattr) +{ + *paddr = offset; + return 0; +} + +static int +nic_uio_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t size, + struct vm_object **obj, int nprot) +{ + /* + * The BAR index is encoded in the offset. Divide the offset by + * PAGE_SIZE to get the index of the bar requested by the user + * app. + */ + unsigned bar = *offset/PAGE_SIZE; + struct nic_uio_softc *sc = cdev->si_drv1; + + if (bar >= MAX_BARS) + return EINVAL; + + if (sc->bar_res[bar] == NULL) { + sc->bar_id[bar] = PCIR_BAR(bar); + + if (PCI_BAR_IO(pci_read_config(sc->dev_t, sc->bar_id[bar], 4))) + sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_IOPORT, + &sc->bar_id[bar], RF_ACTIVE); + else + sc->bar_res[bar] = bus_alloc_resource_any(sc->dev_t, SYS_RES_MEMORY, + &sc->bar_id[bar], RF_ACTIVE); + } + if (sc->bar_res[bar] == NULL) + return ENXIO; + + sc->bar_start[bar] = rman_get_start(sc->bar_res[bar]); + sc->bar_size[bar] = rman_get_size(sc->bar_res[bar]); + + device_printf(sc->dev_t, "Bar %u @ %lx, size %lx\n", bar, + sc->bar_start[bar], sc->bar_size[bar]); + + *offset = sc->bar_start[bar]; + *obj = vm_pager_allocate(OBJT_DEVICE, cdev, size, nprot, *offset, + curthread->td_ucred); + return 0; +} + + +int +nic_uio_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + return 0; +} + +int +nic_uio_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + return 0; +} + +static int +nic_uio_probe (device_t dev) +{ + int i; + unsigned int bus = pci_get_bus(dev); + unsigned int device = pci_get_slot(dev); + unsigned int function = pci_get_function(dev); + + for (i = 0; i < num_detached; i++) + if (bus == pci_get_bus(detached_devices[i]) && + device == pci_get_slot(detached_devices[i]) && + function == pci_get_function(detached_devices[i])) { + device_set_desc(dev, "DPDK PCI Device"); + return BUS_PROBE_SPECIFIC; + } + + return ENXIO; +} + +static int +nic_uio_attach(device_t dev) +{ + int i; + struct nic_uio_softc *sc; + + sc = device_get_softc(dev); + sc->dev_t = dev; + sc->my_cdev = make_dev(&uio_cdevsw, device_get_unit(dev), + UID_ROOT, GID_WHEEL, 0600, "uio@pci:%u:%u:%u", + pci_get_bus(dev), pci_get_slot(dev), pci_get_function(dev)); + if (sc->my_cdev == NULL) + return ENXIO; + sc->my_cdev->si_drv1 = sc; + + for (i = 0; i < MAX_BARS; i++) + sc->bar_res[i] = NULL; + + pci_enable_busmaster(dev); + + return 0; +} + +static int +nic_uio_detach(device_t dev) +{ + int i; + struct nic_uio_softc *sc; + sc = device_get_softc(dev); + + for (i = 0; i < MAX_BARS; i++) + if (sc->bar_res[i] != NULL) { + + if (PCI_BAR_IO(pci_read_config(dev, sc->bar_id[i], 4))) + bus_release_resource(dev, SYS_RES_IOPORT, sc->bar_id[i], + sc->bar_res[i]); + else + bus_release_resource(dev, SYS_RES_MEMORY, sc->bar_id[i], + sc->bar_res[i]); + } + + if (sc->my_cdev != NULL) + destroy_dev(sc->my_cdev); + return 0; +} + +static void +nic_uio_load(void) +{ + uint32_t bus, device, function; + device_t dev; + char bdf_str[256]; + char *token, *remaining; + + memset(bdf_str, 0, sizeof(bdf_str)); + TUNABLE_STR_FETCH("hw.nic_uio.bdfs", bdf_str, sizeof(bdf_str)); + remaining = bdf_str; + /* + * Users should specify PCI BDFs in the format "b:d:f,b:d:f,b:d:f". + * But the code below does not try differentiate between : and , + * and just blindly uses 3 tokens at a time to construct a + * bus/device/function tuple. + * + * There is no checking on strtol() return values, but this should + * be OK. Worst case is it cannot convert and returns 0. This + * could give us a different BDF than intended, but as long as the + * PCI device/vendor ID does not match it will not matter. + */ + while (1) { + if (remaining == NULL || remaining[0] == '\0') + break; + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + bus = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + device = strtol(token, NULL, 10); + token = strsep(&remaining, ",:"); + if (token == NULL) + break; + function = strtol(token, NULL, 10); + + dev = pci_find_bsf(bus, device, function); + if (dev == NULL) + continue; + + if (num_detached < MAX_DETACHED_DEVICES) { + printf("nic_uio_load: detaching and storing dev=%p\n", + dev); + detached_devices[num_detached++] = dev; + } else { + printf("nic_uio_load: reached MAX_DETACHED_DEVICES=%d. dev=%p won't be reattached\n", + MAX_DETACHED_DEVICES, dev); + } + device_detach(dev); + } +} + +static void +nic_uio_unload(void) +{ + int i; + printf("nic_uio_unload: entered...\n"); + + for (i = 0; i < num_detached; i++) { + printf("nic_uio_unload: calling to device_probe_and_attach for dev=%p...\n", + detached_devices[i]); + device_probe_and_attach(detached_devices[i]); + printf("nic_uio_unload: done.\n"); + } + + printf("nic_uio_unload: leaving...\n"); +} + +static int +nic_uio_shutdown(void) +{ + return 0; +} + +static int +nic_uio_modevent(module_t mod, int type, void *arg) +{ + + switch (type) { + case MOD_LOAD: + nic_uio_load(); + break; + case MOD_UNLOAD: + nic_uio_unload(); + break; + case MOD_SHUTDOWN: + nic_uio_shutdown(); + break; + default: + break; + } + + return 0; +} diff --git a/lib/librte_eal/common/Makefile b/lib/librte_eal/common/Makefile new file mode 100644 index 00000000..f5ea0ee8 --- /dev/null +++ b/lib/librte_eal/common/Makefile @@ -0,0 +1,61 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +INC := rte_branch_prediction.h rte_common.h +INC += rte_debug.h rte_eal.h rte_errno.h rte_launch.h rte_lcore.h +INC += rte_log.h rte_memory.h rte_memzone.h rte_pci.h +INC += rte_pci_dev_ids.h rte_per_lcore.h rte_random.h +INC += rte_tailq.h rte_interrupts.h rte_alarm.h +INC += rte_string_fns.h rte_version.h +INC += rte_eal_memconfig.h rte_malloc_heap.h +INC += rte_hexdump.h rte_devargs.h rte_dev.h +INC += rte_pci_dev_feature_defs.h rte_pci_dev_features.h +INC += rte_malloc.h rte_keepalive.h rte_time.h + +ifeq ($(CONFIG_RTE_INSECURE_FUNCTION_WARNING),y) +INC += rte_warnings.h +endif + +GENERIC_INC := rte_atomic.h rte_byteorder.h rte_cycles.h rte_prefetch.h +GENERIC_INC += rte_spinlock.h rte_memcpy.h rte_cpuflags.h rte_rwlock.h +# defined in mk/arch/$(RTE_ARCH)/rte.vars.mk +ARCH_DIR ?= $(RTE_ARCH) +ARCH_INC := $(notdir $(wildcard $(RTE_SDK)/lib/librte_eal/common/include/arch/$(ARCH_DIR)/*.h)) + +SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include := $(addprefix include/,$(INC)) +SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include += \ + $(addprefix include/arch/$(ARCH_DIR)/,$(ARCH_INC)) +SYMLINK-$(CONFIG_RTE_LIBRTE_EAL)-include/generic := \ + $(addprefix include/generic/,$(GENERIC_INC)) + +include $(RTE_SDK)/mk/rte.install.mk diff --git a/lib/librte_eal/common/arch/arm/rte_cpuflags.c b/lib/librte_eal/common/arch/arm/rte_cpuflags.c new file mode 100644 index 00000000..23240efd --- /dev/null +++ b/lib/librte_eal/common/arch/arm/rte_cpuflags.c @@ -0,0 +1,179 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rte_cpuflags.h" + +#include +#include +#include +#include +#include + +#ifndef AT_HWCAP +#define AT_HWCAP 16 +#endif + +#ifndef AT_HWCAP2 +#define AT_HWCAP2 26 +#endif + +#ifndef AT_PLATFORM +#define AT_PLATFORM 15 +#endif + +enum cpu_register_t { + REG_NONE = 0, + REG_HWCAP, + REG_HWCAP2, + REG_PLATFORM, + REG_MAX +}; + +typedef uint32_t hwcap_registers_t[REG_MAX]; + +/** + * Struct to hold a processor feature entry + */ +struct feature_entry { + uint32_t reg; + uint32_t bit; +#define CPU_FLAG_NAME_MAX_LEN 64 + char name[CPU_FLAG_NAME_MAX_LEN]; +}; + +#define FEAT_DEF(name, reg, bit) \ + [RTE_CPUFLAG_##name] = {reg, bit, #name}, + +#ifdef RTE_ARCH_ARMv7 +#define PLATFORM_STR "v7l" +typedef Elf32_auxv_t _Elfx_auxv_t; + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(SWP, REG_HWCAP, 0) + FEAT_DEF(HALF, REG_HWCAP, 1) + FEAT_DEF(THUMB, REG_HWCAP, 2) + FEAT_DEF(A26BIT, REG_HWCAP, 3) + FEAT_DEF(FAST_MULT, REG_HWCAP, 4) + FEAT_DEF(FPA, REG_HWCAP, 5) + FEAT_DEF(VFP, REG_HWCAP, 6) + FEAT_DEF(EDSP, REG_HWCAP, 7) + FEAT_DEF(JAVA, REG_HWCAP, 8) + FEAT_DEF(IWMMXT, REG_HWCAP, 9) + FEAT_DEF(CRUNCH, REG_HWCAP, 10) + FEAT_DEF(THUMBEE, REG_HWCAP, 11) + FEAT_DEF(NEON, REG_HWCAP, 12) + FEAT_DEF(VFPv3, REG_HWCAP, 13) + FEAT_DEF(VFPv3D16, REG_HWCAP, 14) + FEAT_DEF(TLS, REG_HWCAP, 15) + FEAT_DEF(VFPv4, REG_HWCAP, 16) + FEAT_DEF(IDIVA, REG_HWCAP, 17) + FEAT_DEF(IDIVT, REG_HWCAP, 18) + FEAT_DEF(VFPD32, REG_HWCAP, 19) + FEAT_DEF(LPAE, REG_HWCAP, 20) + FEAT_DEF(EVTSTRM, REG_HWCAP, 21) + FEAT_DEF(AES, REG_HWCAP2, 0) + FEAT_DEF(PMULL, REG_HWCAP2, 1) + FEAT_DEF(SHA1, REG_HWCAP2, 2) + FEAT_DEF(SHA2, REG_HWCAP2, 3) + FEAT_DEF(CRC32, REG_HWCAP2, 4) + FEAT_DEF(V7L, REG_PLATFORM, 0) +}; + +#elif defined RTE_ARCH_ARM64 +#define PLATFORM_STR "aarch64" +typedef Elf64_auxv_t _Elfx_auxv_t; + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(FP, REG_HWCAP, 0) + FEAT_DEF(NEON, REG_HWCAP, 1) + FEAT_DEF(EVTSTRM, REG_HWCAP, 2) + FEAT_DEF(AES, REG_HWCAP, 3) + FEAT_DEF(PMULL, REG_HWCAP, 4) + FEAT_DEF(SHA1, REG_HWCAP, 5) + FEAT_DEF(SHA2, REG_HWCAP, 6) + FEAT_DEF(CRC32, REG_HWCAP, 7) + FEAT_DEF(ATOMICS, REG_HWCAP, 8) + FEAT_DEF(AARCH64, REG_PLATFORM, 1) +}; +#endif /* RTE_ARCH */ + +/* + * Read AUXV software register and get cpu features for ARM + */ +static void +rte_cpu_get_features(hwcap_registers_t out) +{ + int auxv_fd; + _Elfx_auxv_t auxv; + + auxv_fd = open("/proc/self/auxv", O_RDONLY); + assert(auxv_fd); + while (read(auxv_fd, &auxv, sizeof(auxv)) == sizeof(auxv)) { + if (auxv.a_type == AT_HWCAP) { + out[REG_HWCAP] = auxv.a_un.a_val; + } else if (auxv.a_type == AT_HWCAP2) { + out[REG_HWCAP2] = auxv.a_un.a_val; + } else if (auxv.a_type == AT_PLATFORM) { + if (!strcmp((const char *)auxv.a_un.a_val, PLATFORM_STR)) + out[REG_PLATFORM] = 0x0001; + } + } +} + +/* + * Checks if a particular flag is available on current machine. + */ +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + hwcap_registers_t regs = {0}; + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return -ENOENT; + + feat = &rte_cpu_feature_table[feature]; + if (feat->reg == REG_NONE) + return -EFAULT; + + rte_cpu_get_features(regs); + return (regs[feat->reg] >> feat->bit) & 1; +} + +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) +{ + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return NULL; + return rte_cpu_feature_table[feature].name; +} diff --git a/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c new file mode 100644 index 00000000..a8147c86 --- /dev/null +++ b/lib/librte_eal/common/arch/ppc_64/rte_cpuflags.c @@ -0,0 +1,147 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "rte_cpuflags.h" + +#include +#include +#include +#include + +/* Symbolic values for the entries in the auxiliary table */ +#define AT_HWCAP 16 +#define AT_HWCAP2 26 + +/* software based registers */ +enum cpu_register_t { + REG_NONE = 0, + REG_HWCAP, + REG_HWCAP2, + REG_MAX +}; + +typedef uint32_t hwcap_registers_t[REG_MAX]; + +struct feature_entry { + uint32_t reg; + uint32_t bit; +#define CPU_FLAG_NAME_MAX_LEN 64 + char name[CPU_FLAG_NAME_MAX_LEN]; +}; + +#define FEAT_DEF(name, reg, bit) \ + [RTE_CPUFLAG_##name] = {reg, bit, #name}, + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(PPC_LE, REG_HWCAP, 0) + FEAT_DEF(TRUE_LE, REG_HWCAP, 1) + FEAT_DEF(PSERIES_PERFMON_COMPAT, REG_HWCAP, 6) + FEAT_DEF(VSX, REG_HWCAP, 7) + FEAT_DEF(ARCH_2_06, REG_HWCAP, 8) + FEAT_DEF(POWER6_EXT, REG_HWCAP, 9) + FEAT_DEF(DFP, REG_HWCAP, 10) + FEAT_DEF(PA6T, REG_HWCAP, 11) + FEAT_DEF(ARCH_2_05, REG_HWCAP, 12) + FEAT_DEF(ICACHE_SNOOP, REG_HWCAP, 13) + FEAT_DEF(SMT, REG_HWCAP, 14) + FEAT_DEF(BOOKE, REG_HWCAP, 15) + FEAT_DEF(CELLBE, REG_HWCAP, 16) + FEAT_DEF(POWER5_PLUS, REG_HWCAP, 17) + FEAT_DEF(POWER5, REG_HWCAP, 18) + FEAT_DEF(POWER4, REG_HWCAP, 19) + FEAT_DEF(NOTB, REG_HWCAP, 20) + FEAT_DEF(EFP_DOUBLE, REG_HWCAP, 21) + FEAT_DEF(EFP_SINGLE, REG_HWCAP, 22) + FEAT_DEF(SPE, REG_HWCAP, 23) + FEAT_DEF(UNIFIED_CACHE, REG_HWCAP, 24) + FEAT_DEF(4xxMAC, REG_HWCAP, 25) + FEAT_DEF(MMU, REG_HWCAP, 26) + FEAT_DEF(FPU, REG_HWCAP, 27) + FEAT_DEF(ALTIVEC, REG_HWCAP, 28) + FEAT_DEF(PPC601, REG_HWCAP, 29) + FEAT_DEF(PPC64, REG_HWCAP, 30) + FEAT_DEF(PPC32, REG_HWCAP, 31) + FEAT_DEF(TAR, REG_HWCAP2, 26) + FEAT_DEF(LSEL, REG_HWCAP2, 27) + FEAT_DEF(EBB, REG_HWCAP2, 28) + FEAT_DEF(DSCR, REG_HWCAP2, 29) + FEAT_DEF(HTM, REG_HWCAP2, 30) + FEAT_DEF(ARCH_2_07, REG_HWCAP2, 31) +}; + +/* + * Read AUXV software register and get cpu features for Power + */ +static void +rte_cpu_get_features(hwcap_registers_t out) +{ + int auxv_fd; + Elf64_auxv_t auxv; + + auxv_fd = open("/proc/self/auxv", O_RDONLY); + assert(auxv_fd); + while (read(auxv_fd, &auxv, + sizeof(Elf64_auxv_t)) == sizeof(Elf64_auxv_t)) { + if (auxv.a_type == AT_HWCAP) + out[REG_HWCAP] = auxv.a_un.a_val; + else if (auxv.a_type == AT_HWCAP2) + out[REG_HWCAP2] = auxv.a_un.a_val; + } +} + +/* + * Checks if a particular flag is available on current machine. + */ +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + hwcap_registers_t regs = {0}; + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return -ENOENT; + + feat = &rte_cpu_feature_table[feature]; + if (feat->reg == REG_NONE) + return -EFAULT; + + rte_cpu_get_features(regs); + return (regs[feat->reg] >> feat->bit) & 1; +} + +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) +{ + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return NULL; + return rte_cpu_feature_table[feature].name; +} diff --git a/lib/librte_eal/common/arch/tile/rte_cpuflags.c b/lib/librte_eal/common/arch/tile/rte_cpuflags.c new file mode 100644 index 00000000..a2b6c51a --- /dev/null +++ b/lib/librte_eal/common/arch/tile/rte_cpuflags.c @@ -0,0 +1,47 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "rte_cpuflags.h" + +#include + +const struct feature_entry rte_cpu_feature_table[] = { +}; + +/* + * Checks if a particular flag is available on current machine. + */ +int +rte_cpu_get_flag_enabled(__attribute__((unused)) enum rte_cpu_flag_t feature) +{ + return -ENOENT; +} diff --git a/lib/librte_eal/common/arch/x86/rte_cpuflags.c b/lib/librte_eal/common/arch/x86/rte_cpuflags.c new file mode 100644 index 00000000..01382571 --- /dev/null +++ b/lib/librte_eal/common/arch/x86/rte_cpuflags.c @@ -0,0 +1,220 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "rte_cpuflags.h" + +#include +#include +#include + +enum cpu_register_t { + RTE_REG_EAX = 0, + RTE_REG_EBX, + RTE_REG_ECX, + RTE_REG_EDX, +}; + +typedef uint32_t cpuid_registers_t[4]; + +/** + * Struct to hold a processor feature entry + */ +struct feature_entry { + uint32_t leaf; /**< cpuid leaf */ + uint32_t subleaf; /**< cpuid subleaf */ + uint32_t reg; /**< cpuid register */ + uint32_t bit; /**< cpuid register bit */ +#define CPU_FLAG_NAME_MAX_LEN 64 + char name[CPU_FLAG_NAME_MAX_LEN]; /**< String for printing */ +}; + +#define FEAT_DEF(name, leaf, subleaf, reg, bit) \ + [RTE_CPUFLAG_##name] = {leaf, subleaf, reg, bit, #name }, + +const struct feature_entry rte_cpu_feature_table[] = { + FEAT_DEF(SSE3, 0x00000001, 0, RTE_REG_ECX, 0) + FEAT_DEF(PCLMULQDQ, 0x00000001, 0, RTE_REG_ECX, 1) + FEAT_DEF(DTES64, 0x00000001, 0, RTE_REG_ECX, 2) + FEAT_DEF(MONITOR, 0x00000001, 0, RTE_REG_ECX, 3) + FEAT_DEF(DS_CPL, 0x00000001, 0, RTE_REG_ECX, 4) + FEAT_DEF(VMX, 0x00000001, 0, RTE_REG_ECX, 5) + FEAT_DEF(SMX, 0x00000001, 0, RTE_REG_ECX, 6) + FEAT_DEF(EIST, 0x00000001, 0, RTE_REG_ECX, 7) + FEAT_DEF(TM2, 0x00000001, 0, RTE_REG_ECX, 8) + FEAT_DEF(SSSE3, 0x00000001, 0, RTE_REG_ECX, 9) + FEAT_DEF(CNXT_ID, 0x00000001, 0, RTE_REG_ECX, 10) + FEAT_DEF(FMA, 0x00000001, 0, RTE_REG_ECX, 12) + FEAT_DEF(CMPXCHG16B, 0x00000001, 0, RTE_REG_ECX, 13) + FEAT_DEF(XTPR, 0x00000001, 0, RTE_REG_ECX, 14) + FEAT_DEF(PDCM, 0x00000001, 0, RTE_REG_ECX, 15) + FEAT_DEF(PCID, 0x00000001, 0, RTE_REG_ECX, 17) + FEAT_DEF(DCA, 0x00000001, 0, RTE_REG_ECX, 18) + FEAT_DEF(SSE4_1, 0x00000001, 0, RTE_REG_ECX, 19) + FEAT_DEF(SSE4_2, 0x00000001, 0, RTE_REG_ECX, 20) + FEAT_DEF(X2APIC, 0x00000001, 0, RTE_REG_ECX, 21) + FEAT_DEF(MOVBE, 0x00000001, 0, RTE_REG_ECX, 22) + FEAT_DEF(POPCNT, 0x00000001, 0, RTE_REG_ECX, 23) + FEAT_DEF(TSC_DEADLINE, 0x00000001, 0, RTE_REG_ECX, 24) + FEAT_DEF(AES, 0x00000001, 0, RTE_REG_ECX, 25) + FEAT_DEF(XSAVE, 0x00000001, 0, RTE_REG_ECX, 26) + FEAT_DEF(OSXSAVE, 0x00000001, 0, RTE_REG_ECX, 27) + FEAT_DEF(AVX, 0x00000001, 0, RTE_REG_ECX, 28) + FEAT_DEF(F16C, 0x00000001, 0, RTE_REG_ECX, 29) + FEAT_DEF(RDRAND, 0x00000001, 0, RTE_REG_ECX, 30) + + FEAT_DEF(FPU, 0x00000001, 0, RTE_REG_EDX, 0) + FEAT_DEF(VME, 0x00000001, 0, RTE_REG_EDX, 1) + FEAT_DEF(DE, 0x00000001, 0, RTE_REG_EDX, 2) + FEAT_DEF(PSE, 0x00000001, 0, RTE_REG_EDX, 3) + FEAT_DEF(TSC, 0x00000001, 0, RTE_REG_EDX, 4) + FEAT_DEF(MSR, 0x00000001, 0, RTE_REG_EDX, 5) + FEAT_DEF(PAE, 0x00000001, 0, RTE_REG_EDX, 6) + FEAT_DEF(MCE, 0x00000001, 0, RTE_REG_EDX, 7) + FEAT_DEF(CX8, 0x00000001, 0, RTE_REG_EDX, 8) + FEAT_DEF(APIC, 0x00000001, 0, RTE_REG_EDX, 9) + FEAT_DEF(SEP, 0x00000001, 0, RTE_REG_EDX, 11) + FEAT_DEF(MTRR, 0x00000001, 0, RTE_REG_EDX, 12) + FEAT_DEF(PGE, 0x00000001, 0, RTE_REG_EDX, 13) + FEAT_DEF(MCA, 0x00000001, 0, RTE_REG_EDX, 14) + FEAT_DEF(CMOV, 0x00000001, 0, RTE_REG_EDX, 15) + FEAT_DEF(PAT, 0x00000001, 0, RTE_REG_EDX, 16) + FEAT_DEF(PSE36, 0x00000001, 0, RTE_REG_EDX, 17) + FEAT_DEF(PSN, 0x00000001, 0, RTE_REG_EDX, 18) + FEAT_DEF(CLFSH, 0x00000001, 0, RTE_REG_EDX, 19) + FEAT_DEF(DS, 0x00000001, 0, RTE_REG_EDX, 21) + FEAT_DEF(ACPI, 0x00000001, 0, RTE_REG_EDX, 22) + FEAT_DEF(MMX, 0x00000001, 0, RTE_REG_EDX, 23) + FEAT_DEF(FXSR, 0x00000001, 0, RTE_REG_EDX, 24) + FEAT_DEF(SSE, 0x00000001, 0, RTE_REG_EDX, 25) + FEAT_DEF(SSE2, 0x00000001, 0, RTE_REG_EDX, 26) + FEAT_DEF(SS, 0x00000001, 0, RTE_REG_EDX, 27) + FEAT_DEF(HTT, 0x00000001, 0, RTE_REG_EDX, 28) + FEAT_DEF(TM, 0x00000001, 0, RTE_REG_EDX, 29) + FEAT_DEF(PBE, 0x00000001, 0, RTE_REG_EDX, 31) + + FEAT_DEF(DIGTEMP, 0x00000006, 0, RTE_REG_EAX, 0) + FEAT_DEF(TRBOBST, 0x00000006, 0, RTE_REG_EAX, 1) + FEAT_DEF(ARAT, 0x00000006, 0, RTE_REG_EAX, 2) + FEAT_DEF(PLN, 0x00000006, 0, RTE_REG_EAX, 4) + FEAT_DEF(ECMD, 0x00000006, 0, RTE_REG_EAX, 5) + FEAT_DEF(PTM, 0x00000006, 0, RTE_REG_EAX, 6) + + FEAT_DEF(MPERF_APERF_MSR, 0x00000006, 0, RTE_REG_ECX, 0) + FEAT_DEF(ACNT2, 0x00000006, 0, RTE_REG_ECX, 1) + FEAT_DEF(ENERGY_EFF, 0x00000006, 0, RTE_REG_ECX, 3) + + FEAT_DEF(FSGSBASE, 0x00000007, 0, RTE_REG_EBX, 0) + FEAT_DEF(BMI1, 0x00000007, 0, RTE_REG_EBX, 2) + FEAT_DEF(HLE, 0x00000007, 0, RTE_REG_EBX, 4) + FEAT_DEF(AVX2, 0x00000007, 0, RTE_REG_EBX, 5) + FEAT_DEF(SMEP, 0x00000007, 0, RTE_REG_EBX, 6) + FEAT_DEF(BMI2, 0x00000007, 0, RTE_REG_EBX, 7) + FEAT_DEF(ERMS, 0x00000007, 0, RTE_REG_EBX, 8) + FEAT_DEF(INVPCID, 0x00000007, 0, RTE_REG_EBX, 10) + FEAT_DEF(RTM, 0x00000007, 0, RTE_REG_EBX, 11) + FEAT_DEF(AVX512F, 0x00000007, 0, RTE_REG_EBX, 16) + + FEAT_DEF(LAHF_SAHF, 0x80000001, 0, RTE_REG_ECX, 0) + FEAT_DEF(LZCNT, 0x80000001, 0, RTE_REG_ECX, 4) + + FEAT_DEF(SYSCALL, 0x80000001, 0, RTE_REG_EDX, 11) + FEAT_DEF(XD, 0x80000001, 0, RTE_REG_EDX, 20) + FEAT_DEF(1GB_PG, 0x80000001, 0, RTE_REG_EDX, 26) + FEAT_DEF(RDTSCP, 0x80000001, 0, RTE_REG_EDX, 27) + FEAT_DEF(EM64T, 0x80000001, 0, RTE_REG_EDX, 29) + + FEAT_DEF(INVTSC, 0x80000007, 0, RTE_REG_EDX, 8) +}; + +/* + * Execute CPUID instruction and get contents of a specific register + * + * This function, when compiled with GCC, will generate architecture-neutral + * code, as per GCC manual. + */ +static void +rte_cpu_get_features(uint32_t leaf, uint32_t subleaf, cpuid_registers_t out) +{ +#if defined(__i386__) && defined(__PIC__) + /* %ebx is a forbidden register if we compile with -fPIC or -fPIE */ + asm volatile("movl %%ebx,%0 ; cpuid ; xchgl %%ebx,%0" + : "=r" (out[RTE_REG_EBX]), + "=a" (out[RTE_REG_EAX]), + "=c" (out[RTE_REG_ECX]), + "=d" (out[RTE_REG_EDX]) + : "a" (leaf), "c" (subleaf)); +#else + asm volatile("cpuid" + : "=a" (out[RTE_REG_EAX]), + "=b" (out[RTE_REG_EBX]), + "=c" (out[RTE_REG_ECX]), + "=d" (out[RTE_REG_EDX]) + : "a" (leaf), "c" (subleaf)); +#endif +} + +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature) +{ + const struct feature_entry *feat; + cpuid_registers_t regs; + + if (feature >= RTE_CPUFLAG_NUMFLAGS) + /* Flag does not match anything in the feature tables */ + return -ENOENT; + + feat = &rte_cpu_feature_table[feature]; + + if (!feat->leaf) + /* This entry in the table wasn't filled out! */ + return -EFAULT; + + rte_cpu_get_features(feat->leaf & 0xffff0000, 0, regs); + if (((regs[RTE_REG_EAX] ^ feat->leaf) & 0xffff0000) || + regs[RTE_REG_EAX] < feat->leaf) + return 0; + + /* get the cpuid leaf containing the desired feature */ + rte_cpu_get_features(feat->leaf, feat->subleaf, regs); + + /* check if the feature is enabled */ + return (regs[feat->reg] >> feat->bit) & 1; +} + +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature) +{ + if (feature >= RTE_CPUFLAG_NUMFLAGS) + return NULL; + return rte_cpu_feature_table[feature].name; +} diff --git a/lib/librte_eal/common/eal_common_cpuflags.c b/lib/librte_eal/common/eal_common_cpuflags.c new file mode 100644 index 00000000..ecb12409 --- /dev/null +++ b/lib/librte_eal/common/eal_common_cpuflags.c @@ -0,0 +1,76 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include + +/** + * Checks if the machine is adequate for running the binary. If it is not, the + * program exits with status 1. + * The function attribute forces this function to be called before main(). But + * with ICC, the check is generated by the compiler. + */ +#ifndef __INTEL_COMPILER +void __attribute__ ((__constructor__)) +#else +void +#endif +rte_cpu_check_supported(void) +{ + /* This is generated at compile-time by the build system */ + static const enum rte_cpu_flag_t compile_time_flags[] = { + RTE_COMPILE_TIME_CPUFLAGS + }; + unsigned count = RTE_DIM(compile_time_flags), i; + int ret; + + for (i = 0; i < count; i++) { + ret = rte_cpu_get_flag_enabled(compile_time_flags[i]); + + if (ret < 0) { + fprintf(stderr, + "ERROR: CPU feature flag lookup failed with error %d\n", + ret); + exit(1); + } + if (!ret) { + fprintf(stderr, + "ERROR: This system does not support \"%s\".\n" + "Please check that RTE_MACHINE is set correctly.\n", + rte_cpu_get_flag_name(compile_time_flags[i])); + exit(1); + } + } +} diff --git a/lib/librte_eal/common/eal_common_dev.c b/lib/librte_eal/common/eal_common_dev.c new file mode 100644 index 00000000..a8a4146c --- /dev/null +++ b/lib/librte_eal/common/eal_common_dev.c @@ -0,0 +1,152 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" + +/** Global list of device drivers. */ +static struct rte_driver_list dev_driver_list = + TAILQ_HEAD_INITIALIZER(dev_driver_list); + +/* register a driver */ +void +rte_eal_driver_register(struct rte_driver *driver) +{ + TAILQ_INSERT_TAIL(&dev_driver_list, driver, next); +} + +/* unregister a driver */ +void +rte_eal_driver_unregister(struct rte_driver *driver) +{ + TAILQ_REMOVE(&dev_driver_list, driver, next); +} + +int +rte_eal_vdev_init(const char *name, const char *args) +{ + struct rte_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_VDEV) + continue; + + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "eth_pcap", but "name" will be "eth_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->name, name, strlen(driver->name))) + return driver->init(name, args); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} + +int +rte_eal_dev_init(void) +{ + struct rte_devargs *devargs; + struct rte_driver *driver; + + /* + * Note that the dev_driver_list is populated here + * from calls made to rte_eal_driver_register from constructor functions + * embedded into PMD modules via the PMD_REGISTER_DRIVER macro + */ + + /* call the init function for each virtual device */ + TAILQ_FOREACH(devargs, &devargs_list, next) { + + if (devargs->type != RTE_DEVTYPE_VIRTUAL) + continue; + + if (rte_eal_vdev_init(devargs->virt.drv_name, + devargs->args)) { + RTE_LOG(ERR, EAL, "failed to initialize %s device\n", + devargs->virt.drv_name); + return -1; + } + } + + /* Once the vdevs are initalized, start calling all the pdev drivers */ + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_PDEV) + continue; + /* PDEV drivers don't get passed any parameters */ + driver->init(NULL, NULL); + } + return 0; +} + +int +rte_eal_vdev_uninit(const char *name) +{ + struct rte_driver *driver; + + if (name == NULL) + return -EINVAL; + + TAILQ_FOREACH(driver, &dev_driver_list, next) { + if (driver->type != PMD_VDEV) + continue; + + /* + * search a driver prefix in virtual device name. + * For example, if the driver is pcap PMD, driver->name + * will be "eth_pcap", but "name" will be "eth_pcapN". + * So use strncmp to compare. + */ + if (!strncmp(driver->name, name, strlen(driver->name))) + return driver->uninit(name); + } + + RTE_LOG(ERR, EAL, "no driver found for %s\n", name); + return -EINVAL; +} diff --git a/lib/librte_eal/common/eal_common_devargs.c b/lib/librte_eal/common/eal_common_devargs.c new file mode 100644 index 00000000..2bfe54a1 --- /dev/null +++ b/lib/librte_eal/common/eal_common_devargs.c @@ -0,0 +1,176 @@ +/*- + * BSD LICENSE + * + * Copyright 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* This file manages the list of devices and their arguments, as given + * by the user at startup + * + * Code here should not call rte_log since the EAL environment + * may not be initialized. + */ + +#include +#include + +#include +#include +#include "eal_private.h" + +/** Global list of user devices */ +struct rte_devargs_list devargs_list = + TAILQ_HEAD_INITIALIZER(devargs_list); + +int +rte_eal_parse_devargs_str(const char *devargs_str, + char **drvname, char **drvargs) +{ + char *sep; + + if ((devargs_str) == NULL || (drvname) == NULL || (drvargs == NULL)) + return -1; + + *drvname = strdup(devargs_str); + if (drvname == NULL) + return -1; + + /* set the first ',' to '\0' to split name and arguments */ + sep = strchr(*drvname, ','); + if (sep != NULL) { + sep[0] = '\0'; + *drvargs = strdup(sep + 1); + } else { + *drvargs = strdup(""); + } + + if (*drvargs == NULL) { + free(*drvname); + return -1; + } + return 0; +} + +/* store a whitelist parameter for later parsing */ +int +rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str) +{ + struct rte_devargs *devargs = NULL; + char *buf = NULL; + int ret; + + /* use malloc instead of rte_malloc as it's called early at init */ + devargs = malloc(sizeof(*devargs)); + if (devargs == NULL) + goto fail; + + memset(devargs, 0, sizeof(*devargs)); + devargs->type = devtype; + + if (rte_eal_parse_devargs_str(devargs_str, &buf, &devargs->args)) + goto fail; + + switch (devargs->type) { + case RTE_DEVTYPE_WHITELISTED_PCI: + case RTE_DEVTYPE_BLACKLISTED_PCI: + /* try to parse pci identifier */ + if (eal_parse_pci_BDF(buf, &devargs->pci.addr) != 0 && + eal_parse_pci_DomBDF(buf, &devargs->pci.addr) != 0) + goto fail; + + break; + case RTE_DEVTYPE_VIRTUAL: + /* save driver name */ + ret = snprintf(devargs->virt.drv_name, + sizeof(devargs->virt.drv_name), "%s", buf); + if (ret < 0 || ret >= (int)sizeof(devargs->virt.drv_name)) + goto fail; + + break; + } + + free(buf); + TAILQ_INSERT_TAIL(&devargs_list, devargs, next); + return 0; + +fail: + free(buf); + if (devargs) { + free(devargs->args); + free(devargs); + } + + return -1; +} + +/* count the number of devices of a specified type */ +unsigned int +rte_eal_devargs_type_count(enum rte_devtype devtype) +{ + struct rte_devargs *devargs; + unsigned int count = 0; + + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type != devtype) + continue; + count++; + } + return count; +} + +/* dump the user devices on the console */ +void +rte_eal_devargs_dump(FILE *f) +{ + struct rte_devargs *devargs; + + fprintf(f, "User device white list:\n"); + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + fprintf(f, " PCI whitelist " PCI_PRI_FMT " %s\n", + devargs->pci.addr.domain, + devargs->pci.addr.bus, + devargs->pci.addr.devid, + devargs->pci.addr.function, + devargs->args); + else if (devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) + fprintf(f, " PCI blacklist " PCI_PRI_FMT " %s\n", + devargs->pci.addr.domain, + devargs->pci.addr.bus, + devargs->pci.addr.devid, + devargs->pci.addr.function, + devargs->args); + else if (devargs->type == RTE_DEVTYPE_VIRTUAL) + fprintf(f, " VIRTUAL %s %s\n", + devargs->virt.drv_name, + devargs->args); + else + fprintf(f, " UNKNOWN %s\n", devargs->args); + } +} diff --git a/lib/librte_eal/common/eal_common_errno.c b/lib/librte_eal/common/eal_common_errno.c new file mode 100644 index 00000000..de48d8e4 --- /dev/null +++ b/lib/librte_eal/common/eal_common_errno.c @@ -0,0 +1,72 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +RTE_DEFINE_PER_LCORE(int, _rte_errno); + +const char * +rte_strerror(int errnum) +{ +#define RETVAL_SZ 256 + static RTE_DEFINE_PER_LCORE(char[RETVAL_SZ], retval); + + /* since some implementations of strerror_r throw an error + * themselves if errnum is too big, we handle that case here */ + if (errnum > RTE_MAX_ERRNO) + snprintf(RTE_PER_LCORE(retval), RETVAL_SZ, +#ifdef RTE_EXEC_ENV_BSDAPP + "Unknown error: %d", errnum); +#else + "Unknown error %d", errnum); +#endif + else + switch (errnum){ + case E_RTE_SECONDARY: + return "Invalid call in secondary process"; + case E_RTE_NO_CONFIG: + return "Missing rte_config structure"; + default: + strerror_r(errnum, RTE_PER_LCORE(retval), RETVAL_SZ); + } + + return RTE_PER_LCORE(retval); +} diff --git a/lib/librte_eal/common/eal_common_hexdump.c b/lib/librte_eal/common/eal_common_hexdump.c new file mode 100644 index 00000000..d5cbd703 --- /dev/null +++ b/lib/librte_eal/common/eal_common_hexdump.c @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include + +#define LINE_LEN 128 + +/**************************************************************************//** +* +* rte_hexdump - Dump out memory in a special hex dump format. +* +* DESCRIPTION +* Dump out the message buffer in a special hex dump output format with characters +* printed for each line of 16 hex values. +* +* RETURNS: N/A +* +* SEE ALSO: +*/ + +void +rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len) +{ + unsigned int i, out, ofs; + const unsigned char *data = buf; + char line[LINE_LEN]; /* space needed 8+16*3+3+16 == 75 */ + + fprintf(f, "%s at [%p], len=%u\n", (title)? title : " Dump data", data, len); + ofs = 0; + while (ofs < len) { + /* format the line in the buffer, then use printf to output to screen */ + out = snprintf(line, LINE_LEN, "%08X:", ofs); + for (i = 0; ((ofs + i) < len) && (i < 16); i++) + out += snprintf(line+out, LINE_LEN - out, " %02X", (data[ofs+i] & 0xff)); + for(; i <= 16; i++) + out += snprintf(line+out, LINE_LEN - out, " | "); + for(i = 0; (ofs < len) && (i < 16); i++, ofs++) { + unsigned char c = data[ofs]; + if ( (c < ' ') || (c > '~')) + c = '.'; + out += snprintf(line+out, LINE_LEN - out, "%c", c); + } + fprintf(f, "%s\n", line); + } + fflush(f); +} + +/**************************************************************************//** +* +* rte_memdump - Dump out memory in hex bytes with colons. +* +* DESCRIPTION +* Dump out the message buffer in hex bytes with colons xx:xx:xx:xx:... +* +* RETURNS: N/A +* +* SEE ALSO: +*/ + +void +rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len) +{ + unsigned int i, out; + const unsigned char *data = buf; + char line[LINE_LEN]; + + if ( title ) + fprintf(f, "%s: ", title); + + line[0] = '\0'; + for (i = 0, out = 0; i < len; i++) { + // Make sure we do not overrun the line buffer length. + if ( out >= (LINE_LEN - 4) ) { + fprintf(f, "%s", line); + out = 0; + line[out] = '\0'; + } + out += snprintf(line+out, LINE_LEN - out, "%02x%s", + (data[i] & 0xff), ((i+1) < len)? ":" : ""); + } + if ( out > 0 ) + fprintf(f, "%s", line); + fprintf(f, "\n"); + + fflush(f); +} diff --git a/lib/librte_eal/common/eal_common_launch.c b/lib/librte_eal/common/eal_common_launch.c new file mode 100644 index 00000000..229c3a03 --- /dev/null +++ b/lib/librte_eal/common/eal_common_launch.c @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +/* + * Wait until a lcore finished its job. + */ +int +rte_eal_wait_lcore(unsigned slave_id) +{ + if (lcore_config[slave_id].state == WAIT) + return 0; + + while (lcore_config[slave_id].state != WAIT && + lcore_config[slave_id].state != FINISHED); + + rte_rmb(); + + /* we are in finished state, go to wait state */ + lcore_config[slave_id].state = WAIT; + return lcore_config[slave_id].ret; +} + +/* + * Check that every SLAVE lcores are in WAIT state, then call + * rte_eal_remote_launch() for all of them. If call_master is true + * (set to CALL_MASTER), also call the function on the master lcore. + */ +int +rte_eal_mp_remote_launch(int (*f)(void *), void *arg, + enum rte_rmt_call_master_t call_master) +{ + int lcore_id; + int master = rte_get_master_lcore(); + + /* check state of lcores */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (lcore_config[lcore_id].state != WAIT) + return -EBUSY; + } + + /* send messages to cores */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_remote_launch(f, arg, lcore_id); + } + + if (call_master == CALL_MASTER) { + lcore_config[master].ret = f(arg); + lcore_config[master].state = FINISHED; + } + + return 0; +} + +/* + * Return the state of the lcore identified by slave_id. + */ +enum rte_lcore_state_t +rte_eal_get_lcore_state(unsigned lcore_id) +{ + return lcore_config[lcore_id].state; +} + +/* + * Do a rte_eal_wait_lcore() for every lcore. The return values are + * ignored. + */ +void +rte_eal_mp_wait_lcore(void) +{ + unsigned lcore_id; + + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + rte_eal_wait_lcore(lcore_id); + } +} diff --git a/lib/librte_eal/common/eal_common_lcore.c b/lib/librte_eal/common/eal_common_lcore.c new file mode 100644 index 00000000..a4263ba5 --- /dev/null +++ b/lib/librte_eal/common/eal_common_lcore.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +/* + * Parse /sys/devices/system/cpu to get the number of physical and logical + * processors on the machine. The function will fill the cpu_info + * structure. + */ +int +rte_eal_cpu_init(void) +{ + /* pointer to global configuration */ + struct rte_config *config = rte_eal_get_configuration(); + unsigned lcore_id; + unsigned count = 0; + + /* + * Parse the maximum set of logical cores, detect the subset of running + * ones and enable them by default. + */ + for (lcore_id = 0; lcore_id < RTE_MAX_LCORE; lcore_id++) { + lcore_config[lcore_id].core_index = count; + + /* init cpuset for per lcore config */ + CPU_ZERO(&lcore_config[lcore_id].cpuset); + + /* in 1:1 mapping, record related cpu detected state */ + lcore_config[lcore_id].detected = eal_cpu_detected(lcore_id); + if (lcore_config[lcore_id].detected == 0) { + config->lcore_role[lcore_id] = ROLE_OFF; + lcore_config[lcore_id].core_index = -1; + continue; + } + + /* By default, lcore 1:1 map to cpu id */ + CPU_SET(lcore_id, &lcore_config[lcore_id].cpuset); + + /* By default, each detected core is enabled */ + config->lcore_role[lcore_id] = ROLE_RTE; + lcore_config[lcore_id].core_id = eal_cpu_core_id(lcore_id); + lcore_config[lcore_id].socket_id = eal_cpu_socket_id(lcore_id); + if (lcore_config[lcore_id].socket_id >= RTE_MAX_NUMA_NODES) +#ifdef RTE_EAL_ALLOW_INV_SOCKET_ID + lcore_config[lcore_id].socket_id = 0; +#else + rte_panic("Socket ID (%u) is greater than " + "RTE_MAX_NUMA_NODES (%d)\n", + lcore_config[lcore_id].socket_id, + RTE_MAX_NUMA_NODES); +#endif + + RTE_LOG(DEBUG, EAL, "Detected lcore %u as " + "core %u on socket %u\n", + lcore_id, lcore_config[lcore_id].core_id, + lcore_config[lcore_id].socket_id); + count++; + } + /* Set the count of enabled logical cores of the EAL configuration */ + config->lcore_count = count; + RTE_LOG(DEBUG, EAL, + "Support maximum %u logical core(s) by configuration.\n", + RTE_MAX_LCORE); + RTE_LOG(DEBUG, EAL, "Detected %u lcore(s)\n", config->lcore_count); + + return 0; +} diff --git a/lib/librte_eal/common/eal_common_log.c b/lib/librte_eal/common/eal_common_log.c new file mode 100644 index 00000000..1ae8de70 --- /dev/null +++ b/lib/librte_eal/common/eal_common_log.c @@ -0,0 +1,337 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +#define LOG_ELT_SIZE 2048 + +#define LOG_HISTORY_MP_NAME "log_history" + +STAILQ_HEAD(log_history_list, log_history); + +/** + * The structure of a message log in the log history. + */ +struct log_history { + STAILQ_ENTRY(log_history) next; + unsigned size; + char buf[0]; +}; + +static struct rte_mempool *log_history_mp = NULL; +static unsigned log_history_size = 0; +static struct log_history_list log_history; + +/* global log structure */ +struct rte_logs rte_logs = { + .type = ~0, + .level = RTE_LOG_DEBUG, + .file = NULL, +}; + +static rte_spinlock_t log_dump_lock = RTE_SPINLOCK_INITIALIZER; +static rte_spinlock_t log_list_lock = RTE_SPINLOCK_INITIALIZER; +static FILE *default_log_stream; +static int history_enabled = 1; + +/** + * This global structure stores some informations about the message + * that is currently beeing processed by one lcore + */ +struct log_cur_msg { + uint32_t loglevel; /**< log level - see rte_log.h */ + uint32_t logtype; /**< log type - see rte_log.h */ +} __rte_cache_aligned; +static struct log_cur_msg log_cur_msg[RTE_MAX_LCORE]; /**< per core log */ + + +/* default logs */ + +int +rte_log_add_in_history(const char *buf, size_t size) +{ + struct log_history *hist_buf = NULL; + static const unsigned hist_buf_size = LOG_ELT_SIZE - sizeof(*hist_buf); + void *obj; + + if (history_enabled == 0) + return 0; + + rte_spinlock_lock(&log_list_lock); + + /* get a buffer for adding in history */ + if (log_history_size > RTE_LOG_HISTORY) { + hist_buf = STAILQ_FIRST(&log_history); + if (hist_buf) { + STAILQ_REMOVE_HEAD(&log_history, next); + log_history_size--; + } + } + else { + if (rte_mempool_mc_get(log_history_mp, &obj) < 0) + obj = NULL; + hist_buf = obj; + } + + /* no buffer */ + if (hist_buf == NULL) { + rte_spinlock_unlock(&log_list_lock); + return -ENOBUFS; + } + + /* not enough room for msg, buffer go back in mempool */ + if (size >= hist_buf_size) { + rte_mempool_mp_put(log_history_mp, hist_buf); + rte_spinlock_unlock(&log_list_lock); + return -ENOBUFS; + } + + /* add in history */ + memcpy(hist_buf->buf, buf, size); + hist_buf->buf[size] = hist_buf->buf[hist_buf_size-1] = '\0'; + hist_buf->size = size; + STAILQ_INSERT_TAIL(&log_history, hist_buf, next); + log_history_size++; + rte_spinlock_unlock(&log_list_lock); + + return 0; +} + +void +rte_log_set_history(int enable) +{ + history_enabled = enable; +} + +/* Change the stream that will be used by logging system */ +int +rte_openlog_stream(FILE *f) +{ + if (f == NULL) + rte_logs.file = default_log_stream; + else + rte_logs.file = f; + return 0; +} + +/* Set global log level */ +void +rte_set_log_level(uint32_t level) +{ + rte_logs.level = (uint32_t)level; +} + +/* Get global log level */ +uint32_t +rte_get_log_level(void) +{ + return rte_logs.level; +} + +/* Set global log type */ +void +rte_set_log_type(uint32_t type, int enable) +{ + if (enable) + rte_logs.type |= type; + else + rte_logs.type &= (~type); +} + +/* Get global log type */ +uint32_t +rte_get_log_type(void) +{ + return rte_logs.type; +} + +/* get the current loglevel for the message beeing processed */ +int rte_log_cur_msg_loglevel(void) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + if (lcore_id >= RTE_MAX_LCORE) + return rte_get_log_level(); + return log_cur_msg[lcore_id].loglevel; +} + +/* get the current logtype for the message beeing processed */ +int rte_log_cur_msg_logtype(void) +{ + unsigned lcore_id; + lcore_id = rte_lcore_id(); + if (lcore_id >= RTE_MAX_LCORE) + return rte_get_log_type(); + return log_cur_msg[lcore_id].logtype; +} + +/* Dump log history to file */ +void +rte_log_dump_history(FILE *out) +{ + struct log_history_list tmp_log_history; + struct log_history *hist_buf; + unsigned i; + + /* only one dump at a time */ + rte_spinlock_lock(&log_dump_lock); + + /* save list, and re-init to allow logging during dump */ + rte_spinlock_lock(&log_list_lock); + tmp_log_history = log_history; + STAILQ_INIT(&log_history); + log_history_size = 0; + rte_spinlock_unlock(&log_list_lock); + + for (i=0; ibuf, hist_buf->size, 1, out) == 0) { + rte_mempool_mp_put(log_history_mp, hist_buf); + break; + } + + /* put back message structure in pool */ + rte_mempool_mp_put(log_history_mp, hist_buf); + } + fflush(out); + + rte_spinlock_unlock(&log_dump_lock); +} + +/* + * Generates a log message The message will be sent in the stream + * defined by the previous call to rte_openlog_stream(). + */ +int +rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) +{ + int ret; + FILE *f = rte_logs.file; + unsigned lcore_id; + + if ((level > rte_logs.level) || !(logtype & rte_logs.type)) + return 0; + + /* save loglevel and logtype in a global per-lcore variable */ + lcore_id = rte_lcore_id(); + if (lcore_id < RTE_MAX_LCORE) { + log_cur_msg[lcore_id].loglevel = level; + log_cur_msg[lcore_id].logtype = logtype; + } + + ret = vfprintf(f, format, ap); + fflush(f); + return ret; +} + +/* + * Generates a log message The message will be sent in the stream + * defined by the previous call to rte_openlog_stream(). + * No need to check level here, done by rte_vlog(). + */ +int +rte_log(uint32_t level, uint32_t logtype, const char *format, ...) +{ + va_list ap; + int ret; + + va_start(ap, format); + ret = rte_vlog(level, logtype, format, ap); + va_end(ap); + return ret; +} + +/* + * called by environment-specific log init function to initialize log + * history + */ +int +rte_eal_common_log_init(FILE *default_log) +{ + STAILQ_INIT(&log_history); + + /* reserve RTE_LOG_HISTORY*2 elements, so we can dump and + * keep logging during this time */ + log_history_mp = rte_mempool_create(LOG_HISTORY_MP_NAME, RTE_LOG_HISTORY*2, + LOG_ELT_SIZE, 0, 0, + NULL, NULL, + NULL, NULL, + SOCKET_ID_ANY, 0); + + if ((log_history_mp == NULL) && + ((log_history_mp = rte_mempool_lookup(LOG_HISTORY_MP_NAME)) == NULL)){ + RTE_LOG(ERR, EAL, "%s(): cannot create log_history mempool\n", + __func__); + return -1; + } + + default_log_stream = default_log; + rte_openlog_stream(default_log); + return 0; +} diff --git a/lib/librte_eal/common/eal_common_memory.c b/lib/librte_eal/common/eal_common_memory.c new file mode 100644 index 00000000..6155752e --- /dev/null +++ b/lib/librte_eal/common/eal_common_memory.c @@ -0,0 +1,154 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +/* + * Return a pointer to a read-only table of struct rte_physmem_desc + * elements, containing the layout of all addressable physical + * memory. The last element of the table contains a NULL address. + */ +const struct rte_memseg * +rte_eal_get_physmem_layout(void) +{ + return rte_eal_get_configuration()->mem_config->memseg; +} + + +/* get the total size of memory */ +uint64_t +rte_eal_get_physmem_size(void) +{ + const struct rte_mem_config *mcfg; + unsigned i = 0; + uint64_t total_len = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + total_len += mcfg->memseg[i].len; + } + + return total_len; +} + +/* Dump the physical memory layout on console */ +void +rte_dump_physmem_layout(FILE *f) +{ + const struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + if (mcfg->memseg[i].addr == NULL) + break; + + fprintf(f, "Segment %u: phys:0x%"PRIx64", len:%zu, " + "virt:%p, socket_id:%"PRId32", " + "hugepage_sz:%"PRIu64", nchannel:%"PRIx32", " + "nrank:%"PRIx32"\n", i, + mcfg->memseg[i].phys_addr, + mcfg->memseg[i].len, + mcfg->memseg[i].addr, + mcfg->memseg[i].socket_id, + mcfg->memseg[i].hugepage_sz, + mcfg->memseg[i].nchannel, + mcfg->memseg[i].nrank); + } +} + +/* return the number of memory channels */ +unsigned rte_memory_get_nchannel(void) +{ + return rte_eal_get_configuration()->mem_config->nchannel; +} + +/* return the number of memory rank */ +unsigned rte_memory_get_nrank(void) +{ + return rte_eal_get_configuration()->mem_config->nrank; +} + +static int +rte_eal_memdevice_init(void) +{ + struct rte_config *config; + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + config = rte_eal_get_configuration(); + config->mem_config->nchannel = internal_config.force_nchannel; + config->mem_config->nrank = internal_config.force_nrank; + + return 0; +} + +/* init memory subsystem */ +int +rte_eal_memory_init(void) +{ + RTE_LOG(DEBUG, EAL, "Setting up physically contiguous memory...\n"); + + const int retval = rte_eal_process_type() == RTE_PROC_PRIMARY ? + rte_eal_hugepage_init() : + rte_eal_hugepage_attach(); + if (retval < 0) + return -1; + + if (internal_config.no_shconf == 0 && rte_eal_memdevice_init() < 0) + return -1; + + return 0; +} diff --git a/lib/librte_eal/common/eal_common_memzone.c b/lib/librte_eal/common/eal_common_memzone.c new file mode 100644 index 00000000..711c8457 --- /dev/null +++ b/lib/librte_eal/common/eal_common_memzone.c @@ -0,0 +1,445 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_heap.h" +#include "malloc_elem.h" +#include "eal_private.h" + +static inline const struct rte_memzone * +memzone_lookup_thread_unsafe(const char *name) +{ + const struct rte_mem_config *mcfg; + const struct rte_memzone *mz; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* + * the algorithm is not optimal (linear), but there are few + * zones and this function should be called at init only + */ + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + mz = &mcfg->memzone[i]; + if (mz->addr != NULL && !strncmp(name, mz->name, RTE_MEMZONE_NAMESIZE)) + return &mcfg->memzone[i]; + } + + return NULL; +} + +static inline struct rte_memzone * +get_next_free_memzone(void) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_MEMZONE; i++) { + if (mcfg->memzone[i].addr == NULL) + return &mcfg->memzone[i]; + } + + return NULL; +} + +/* This function will return the greatest free block if a heap has been + * specified. If no heap has been specified, it will return the heap and + * length of the greatest free block available in all heaps */ +static size_t +find_heap_max_free_elem(int *s, unsigned align) +{ + struct rte_mem_config *mcfg; + struct rte_malloc_socket_stats stats; + int i, socket = *s; + size_t len = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if ((socket != SOCKET_ID_ANY) && (socket != i)) + continue; + + malloc_heap_get_stats(&mcfg->malloc_heaps[i], &stats); + if (stats.greatest_free_size > len) { + len = stats.greatest_free_size; + *s = i; + } + } + + return len - MALLOC_ELEM_OVERHEAD - align; +} + +static const struct rte_memzone * +memzone_reserve_aligned_thread_unsafe(const char *name, size_t len, + int socket_id, unsigned flags, unsigned align, unsigned bound) +{ + struct rte_mem_config *mcfg; + size_t requested_len; + int socket, i; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* no more room in config */ + if (mcfg->memzone_cnt >= RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "%s(): No more room in config\n", __func__); + rte_errno = ENOSPC; + return NULL; + } + + /* zone already exist */ + if ((memzone_lookup_thread_unsafe(name)) != NULL) { + RTE_LOG(DEBUG, EAL, "%s(): memzone <%s> already exists\n", + __func__, name); + rte_errno = EEXIST; + return NULL; + } + + /* if alignment is not a power of two */ + if (align && !rte_is_power_of_2(align)) { + RTE_LOG(ERR, EAL, "%s(): Invalid alignment: %u\n", __func__, + align); + rte_errno = EINVAL; + return NULL; + } + + /* alignment less than cache size is not allowed */ + if (align < RTE_CACHE_LINE_SIZE) + align = RTE_CACHE_LINE_SIZE; + + /* align length on cache boundary. Check for overflow before doing so */ + if (len > SIZE_MAX - RTE_CACHE_LINE_MASK) { + rte_errno = EINVAL; /* requested size too big */ + return NULL; + } + + len += RTE_CACHE_LINE_MASK; + len &= ~((size_t) RTE_CACHE_LINE_MASK); + + /* save minimal requested length */ + requested_len = RTE_MAX((size_t)RTE_CACHE_LINE_SIZE, len); + + /* check that boundary condition is valid */ + if (bound != 0 && (requested_len > bound || !rte_is_power_of_2(bound))) { + rte_errno = EINVAL; + return NULL; + } + + if ((socket_id != SOCKET_ID_ANY) && (socket_id >= RTE_MAX_NUMA_NODES)) { + rte_errno = EINVAL; + return NULL; + } + + if (!rte_eal_has_hugepages()) + socket_id = SOCKET_ID_ANY; + + if (len == 0) { + if (bound != 0) + requested_len = bound; + else + requested_len = find_heap_max_free_elem(&socket_id, align); + } + + if (socket_id == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_id; + + /* allocate memory on heap */ + void *mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[socket], NULL, + requested_len, flags, align, bound); + + if ((mz_addr == NULL) && (socket_id == SOCKET_ID_ANY)) { + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + if (socket == i) + continue; + + mz_addr = malloc_heap_alloc(&mcfg->malloc_heaps[i], + NULL, requested_len, flags, align, bound); + if (mz_addr != NULL) + break; + } + } + + if (mz_addr == NULL) { + rte_errno = ENOMEM; + return NULL; + } + + const struct malloc_elem *elem = malloc_elem_from_data(mz_addr); + + /* fill the zone in config */ + struct rte_memzone *mz = get_next_free_memzone(); + + if (mz == NULL) { + RTE_LOG(ERR, EAL, "%s(): Cannot find free memzone but there is room " + "in config!\n", __func__); + rte_errno = ENOSPC; + return NULL; + } + + mcfg->memzone_cnt++; + snprintf(mz->name, sizeof(mz->name), "%s", name); + mz->phys_addr = rte_malloc_virt2phy(mz_addr); + mz->addr = mz_addr; + mz->len = (requested_len == 0 ? elem->size : requested_len); + mz->hugepage_sz = elem->ms->hugepage_sz; + mz->socket_id = elem->ms->socket_id; + mz->flags = 0; + mz->memseg_id = elem->ms - rte_eal_get_configuration()->mem_config->memseg; + + return mz; +} + +static const struct rte_memzone * +rte_memzone_reserve_thread_safe(const char *name, size_t len, + int socket_id, unsigned flags, unsigned align, + unsigned bound) +{ + struct rte_mem_config *mcfg; + const struct rte_memzone *mz = NULL; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_write_lock(&mcfg->mlock); + + mz = memzone_reserve_aligned_thread_unsafe( + name, len, socket_id, flags, align, bound); + + rte_rwlock_write_unlock(&mcfg->mlock); + + return mz; +} + +/* + * Return a pointer to a correctly filled memzone descriptor (with a + * specified alignment and boundary). If the allocation cannot be done, + * return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve_bounded(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align, unsigned bound) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, + align, bound); +} + +/* + * Return a pointer to a correctly filled memzone descriptor (with a + * specified alignment). If the allocation cannot be done, return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve_aligned(const char *name, size_t len, int socket_id, + unsigned flags, unsigned align) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, flags, + align, 0); +} + +/* + * Return a pointer to a correctly filled memzone descriptor. If the + * allocation cannot be done, return NULL. + */ +const struct rte_memzone * +rte_memzone_reserve(const char *name, size_t len, int socket_id, + unsigned flags) +{ + return rte_memzone_reserve_thread_safe(name, len, socket_id, + flags, RTE_CACHE_LINE_SIZE, 0); +} + +int +rte_memzone_free(const struct rte_memzone *mz) +{ + struct rte_mem_config *mcfg; + int ret = 0; + void *addr; + unsigned idx; + + if (mz == NULL) + return -EINVAL; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_write_lock(&mcfg->mlock); + + idx = ((uintptr_t)mz - (uintptr_t)mcfg->memzone); + idx = idx / sizeof(struct rte_memzone); + + addr = mcfg->memzone[idx].addr; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * If ioremap_addr is set, it's an IVSHMEM memzone and we cannot + * free it. + */ + if (mcfg->memzone[idx].ioremap_addr != 0) + ret = -EINVAL; +#endif + if (addr == NULL) + ret = -EINVAL; + else if (mcfg->memzone_cnt == 0) { + rte_panic("%s(): memzone address not NULL but memzone_cnt is 0!\n", + __func__); + } else { + memset(&mcfg->memzone[idx], 0, sizeof(mcfg->memzone[idx])); + mcfg->memzone_cnt--; + } + + rte_rwlock_write_unlock(&mcfg->mlock); + + rte_free(addr); + + return ret; +} + +/* + * Lookup for the memzone identified by the given name + */ +const struct rte_memzone * +rte_memzone_lookup(const char *name) +{ + struct rte_mem_config *mcfg; + const struct rte_memzone *memzone = NULL; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + + memzone = memzone_lookup_thread_unsafe(name); + + rte_rwlock_read_unlock(&mcfg->mlock); + + return memzone; +} + +/* Dump all reserved memory zones on console */ +void +rte_memzone_dump(FILE *f) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + /* dump all zones */ + for (i=0; imemzone[i].addr == NULL) + break; + fprintf(f, "Zone %u: name:<%s>, phys:0x%"PRIx64", len:0x%zx" + ", virt:%p, socket_id:%"PRId32", flags:%"PRIx32"\n", i, + mcfg->memzone[i].name, + mcfg->memzone[i].phys_addr, + mcfg->memzone[i].len, + mcfg->memzone[i].addr, + mcfg->memzone[i].socket_id, + mcfg->memzone[i].flags); + } + rte_rwlock_read_unlock(&mcfg->mlock); +} + +/* + * Init the memzone subsystem + */ +int +rte_eal_memzone_init(void) +{ + struct rte_mem_config *mcfg; + const struct rte_memseg *memseg; + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* secondary processes don't need to initialise anything */ + if (rte_eal_process_type() == RTE_PROC_SECONDARY) + return 0; + + memseg = rte_eal_get_physmem_layout(); + if (memseg == NULL) { + RTE_LOG(ERR, EAL, "%s(): Cannot get physical layout\n", __func__); + return -1; + } + + rte_rwlock_write_lock(&mcfg->mlock); + + /* delete all zones */ + mcfg->memzone_cnt = 0; + memset(mcfg->memzone, 0, sizeof(mcfg->memzone)); + + rte_rwlock_write_unlock(&mcfg->mlock); + + return rte_eal_malloc_heap_init(); +} + +/* Walk all reserved memory zones */ +void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *), + void *arg) +{ + struct rte_mem_config *mcfg; + unsigned i; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->mlock); + for (i=0; imemzone[i].addr != NULL) + (*func)(&mcfg->memzone[i], arg); + } + rte_rwlock_read_unlock(&mcfg->mlock); +} diff --git a/lib/librte_eal/common/eal_common_options.c b/lib/librte_eal/common/eal_common_options.c new file mode 100644 index 00000000..2b418d52 --- /dev/null +++ b/lib/librte_eal/common/eal_common_options.c @@ -0,0 +1,1022 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_options.h" +#include "eal_filesystem.h" + +#define BITS_PER_HEX 4 + +const char +eal_short_options[] = + "b:" /* pci-blacklist */ + "c:" /* coremask */ + "d:" /* driver */ + "h" /* help */ + "l:" /* corelist */ + "m:" /* memory size */ + "n:" /* memory channels */ + "r:" /* memory ranks */ + "v" /* version */ + "w:" /* pci-whitelist */ + ; + +const struct option +eal_long_options[] = { + {OPT_BASE_VIRTADDR, 1, NULL, OPT_BASE_VIRTADDR_NUM }, + {OPT_CREATE_UIO_DEV, 0, NULL, OPT_CREATE_UIO_DEV_NUM }, + {OPT_FILE_PREFIX, 1, NULL, OPT_FILE_PREFIX_NUM }, + {OPT_HELP, 0, NULL, OPT_HELP_NUM }, + {OPT_HUGE_DIR, 1, NULL, OPT_HUGE_DIR_NUM }, + {OPT_HUGE_UNLINK, 0, NULL, OPT_HUGE_UNLINK_NUM }, + {OPT_LCORES, 1, NULL, OPT_LCORES_NUM }, + {OPT_LOG_LEVEL, 1, NULL, OPT_LOG_LEVEL_NUM }, + {OPT_MASTER_LCORE, 1, NULL, OPT_MASTER_LCORE_NUM }, + {OPT_NO_HPET, 0, NULL, OPT_NO_HPET_NUM }, + {OPT_NO_HUGE, 0, NULL, OPT_NO_HUGE_NUM }, + {OPT_NO_PCI, 0, NULL, OPT_NO_PCI_NUM }, + {OPT_NO_SHCONF, 0, NULL, OPT_NO_SHCONF_NUM }, + {OPT_PCI_BLACKLIST, 1, NULL, OPT_PCI_BLACKLIST_NUM }, + {OPT_PCI_WHITELIST, 1, NULL, OPT_PCI_WHITELIST_NUM }, + {OPT_PROC_TYPE, 1, NULL, OPT_PROC_TYPE_NUM }, + {OPT_SOCKET_MEM, 1, NULL, OPT_SOCKET_MEM_NUM }, + {OPT_SYSLOG, 1, NULL, OPT_SYSLOG_NUM }, + {OPT_VDEV, 1, NULL, OPT_VDEV_NUM }, + {OPT_VFIO_INTR, 1, NULL, OPT_VFIO_INTR_NUM }, + {OPT_VMWARE_TSC_MAP, 0, NULL, OPT_VMWARE_TSC_MAP_NUM }, + {OPT_XEN_DOM0, 0, NULL, OPT_XEN_DOM0_NUM }, + {0, 0, NULL, 0 } +}; + +TAILQ_HEAD(shared_driver_list, shared_driver); + +/* Definition for shared object drivers. */ +struct shared_driver { + TAILQ_ENTRY(shared_driver) next; + + char name[PATH_MAX]; + void* lib_handle; +}; + +/* List of external loadable drivers */ +static struct shared_driver_list solib_list = +TAILQ_HEAD_INITIALIZER(solib_list); + +/* Default path of external loadable drivers */ +static const char *default_solib_dir = RTE_EAL_PMD_PATH; + +static int master_lcore_parsed; +static int mem_parsed; + +void +eal_reset_internal_config(struct internal_config *internal_cfg) +{ + int i; + + internal_cfg->memory = 0; + internal_cfg->force_nrank = 0; + internal_cfg->force_nchannel = 0; + internal_cfg->hugefile_prefix = HUGEFILE_PREFIX_DEFAULT; + internal_cfg->hugepage_dir = NULL; + internal_cfg->force_sockets = 0; + /* zero out the NUMA config */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->socket_mem[i] = 0; + /* zero out hugedir descriptors */ + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + internal_cfg->hugepage_info[i].lock_descriptor = -1; + internal_cfg->base_virtaddr = 0; + + internal_cfg->syslog_facility = LOG_DAEMON; + /* default value from build option */ + internal_cfg->log_level = RTE_LOG_LEVEL; + + internal_cfg->xen_dom0_support = 0; + + /* if set to NONE, interrupt mode is determined automatically */ + internal_cfg->vfio_intr_mode = RTE_INTR_MODE_NONE; + +#ifdef RTE_LIBEAL_USE_HPET + internal_cfg->no_hpet = 0; +#else + internal_cfg->no_hpet = 1; +#endif + internal_cfg->vmware_tsc_map = 0; + internal_cfg->create_uio_dev = 0; +} + +static int +eal_plugin_add(const char *path) +{ + struct shared_driver *solib; + + solib = malloc(sizeof(*solib)); + if (solib == NULL) { + RTE_LOG(ERR, EAL, "malloc(solib) failed\n"); + return -1; + } + memset(solib, 0, sizeof(*solib)); + strncpy(solib->name, path, PATH_MAX-1); + solib->name[PATH_MAX-1] = 0; + TAILQ_INSERT_TAIL(&solib_list, solib, next); + + return 0; +} + +static int +eal_plugindir_init(const char *path) +{ + DIR *d = NULL; + struct dirent *dent = NULL; + char sopath[PATH_MAX]; + + if (path == NULL || *path == '\0') + return 0; + + d = opendir(path); + if (d == NULL) { + RTE_LOG(ERR, EAL, "failed to open directory %s: %s\n", + path, strerror(errno)); + return -1; + } + + while ((dent = readdir(d)) != NULL) { + struct stat sb; + + snprintf(sopath, PATH_MAX-1, "%s/%s", path, dent->d_name); + sopath[PATH_MAX-1] = 0; + + if (!(stat(sopath, &sb) == 0 && S_ISREG(sb.st_mode))) + continue; + + if (eal_plugin_add(sopath) == -1) + break; + } + + closedir(d); + /* XXX this ignores failures from readdir() itself */ + return (dent == NULL) ? 0 : -1; +} + +int +eal_plugins_init(void) +{ + struct shared_driver *solib = NULL; + + if (*default_solib_dir != '\0') + eal_plugin_add(default_solib_dir); + + TAILQ_FOREACH(solib, &solib_list, next) { + struct stat sb; + + if (stat(solib->name, &sb) == 0 && S_ISDIR(sb.st_mode)) { + if (eal_plugindir_init(solib->name) == -1) { + RTE_LOG(ERR, EAL, + "Cannot init plugin directory %s\n", + solib->name); + return -1; + } + } else { + RTE_LOG(DEBUG, EAL, "open shared lib %s\n", + solib->name); + solib->lib_handle = dlopen(solib->name, RTLD_NOW); + if (solib->lib_handle == NULL) { + RTE_LOG(ERR, EAL, "%s\n", dlerror()); + return -1; + } + } + + } + return 0; +} + +/* + * Parse the coremask given as argument (hexadecimal string) and fill + * the global configuration (core role and core count) with the parsed + * value. + */ +static int xdigit2val(unsigned char c) +{ + int val; + + if (isdigit(c)) + val = c - '0'; + else if (isupper(c)) + val = c - 'A' + 10; + else + val = c - 'a' + 10; + return val; +} + +static int +eal_parse_coremask(const char *coremask) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, j, idx = 0; + unsigned count = 0; + char c; + int val; + + if (coremask == NULL) + return -1; + /* Remove all blank characters ahead and after . + * Remove 0x/0X if exists. + */ + while (isblank(*coremask)) + coremask++; + if (coremask[0] == '0' && ((coremask[1] == 'x') + || (coremask[1] == 'X'))) + coremask += 2; + i = strlen(coremask); + while ((i > 0) && isblank(coremask[i - 1])) + i--; + if (i == 0) + return -1; + + for (i = i - 1; i >= 0 && idx < RTE_MAX_LCORE; i--) { + c = coremask[i]; + if (isxdigit(c) == 0) { + /* invalid characters */ + return -1; + } + val = xdigit2val(c); + for (j = 0; j < BITS_PER_HEX && idx < RTE_MAX_LCORE; j++, idx++) + { + if ((1 << j) & val) { + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, "lcore %u " + "unavailable\n", idx); + return -1; + } + cfg->lcore_role[idx] = ROLE_RTE; + lcore_config[idx].core_index = count; + count++; + } else { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + } + } + for (; i >= 0; i--) + if (coremask[i] != '0') + return -1; + for (; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + if (count == 0) + return -1; + /* Update the count of enabled logical cores of the EAL configuration */ + cfg->lcore_count = count; + return 0; +} + +static int +eal_parse_corelist(const char *corelist) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + int i, idx = 0; + unsigned count = 0; + char *end = NULL; + int min, max; + + if (corelist == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*corelist)) + corelist++; + i = strlen(corelist); + while ((i > 0) && isblank(corelist[i - 1])) + i--; + + /* Reset config */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + } + + /* Get list of cores */ + min = RTE_MAX_LCORE; + do { + while (isblank(*corelist)) + corelist++; + if (*corelist == '\0') + return -1; + errno = 0; + idx = strtoul(corelist, &end, 10); + if (errno || end == NULL) + return -1; + while (isblank(*end)) + end++; + if (*end == '-') { + min = idx; + } else if ((*end == ',') || (*end == '\0')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = min; idx <= max; idx++) { + if (cfg->lcore_role[idx] != ROLE_RTE) { + cfg->lcore_role[idx] = ROLE_RTE; + lcore_config[idx].core_index = count; + count++; + } + } + min = RTE_MAX_LCORE; + } else + return -1; + corelist = end + 1; + } while (*end != '\0'); + + if (count == 0) + return -1; + + /* Update the count of enabled logical cores of the EAL configuration */ + cfg->lcore_count = count; + + return 0; +} + +/* Changes the lcore id of the master thread */ +static int +eal_parse_master_lcore(const char *arg) +{ + char *parsing_end; + struct rte_config *cfg = rte_eal_get_configuration(); + + errno = 0; + cfg->master_lcore = (uint32_t) strtol(arg, &parsing_end, 0); + if (errno || parsing_end[0] != 0) + return -1; + if (cfg->master_lcore >= RTE_MAX_LCORE) + return -1; + master_lcore_parsed = 1; + return 0; +} + +/* + * Parse elem, the elem could be single number/range or '(' ')' group + * 1) A single number elem, it's just a simple digit. e.g. 9 + * 2) A single range elem, two digits with a '-' between. e.g. 2-6 + * 3) A group elem, combines multiple 1) or 2) with '( )'. e.g (0,2-4,6) + * Within group elem, '-' used for a range separator; + * ',' used for a single number. + */ +static int +eal_parse_set(const char *input, uint16_t set[], unsigned num) +{ + unsigned idx; + const char *str = input; + char *end = NULL; + unsigned min, max; + + memset(set, 0, num * sizeof(uint16_t)); + + while (isblank(*str)) + str++; + + /* only digit or left bracket is qualify for start point */ + if ((!isdigit(*str) && *str != '(') || *str == '\0') + return -1; + + /* process single number or single range of number */ + if (*str != '(') { + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + else { + while (isblank(*end)) + end++; + + min = idx; + max = idx; + if (*end == '-') { + /* process single - */ + end++; + while (isblank(*end)) + end++; + if (!isdigit(*end)) + return -1; + + errno = 0; + idx = strtoul(end, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + max = idx; + while (isblank(*end)) + end++; + if (*end != ',' && *end != '\0') + return -1; + } + + if (*end != ',' && *end != '\0' && + *end != '@') + return -1; + + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) + set[idx] = 1; + + return end - input; + } + } + + /* process set within bracket */ + str++; + while (isblank(*str)) + str++; + if (*str == '\0') + return -1; + + min = RTE_MAX_LCORE; + do { + + /* go ahead to the first digit */ + while (isblank(*str)) + str++; + if (!isdigit(*str)) + return -1; + + /* get the digit value */ + errno = 0; + idx = strtoul(str, &end, 10); + if (errno || end == NULL || idx >= num) + return -1; + + /* go ahead to separator '-',',' and ')' */ + while (isblank(*end)) + end++; + if (*end == '-') { + if (min == RTE_MAX_LCORE) + min = idx; + else /* avoid continuous '-' */ + return -1; + } else if ((*end == ',') || (*end == ')')) { + max = idx; + if (min == RTE_MAX_LCORE) + min = idx; + for (idx = RTE_MIN(min, max); + idx <= RTE_MAX(min, max); idx++) + set[idx] = 1; + + min = RTE_MAX_LCORE; + } else + return -1; + + str = end + 1; + } while (*end != '\0' && *end != ')'); + + return str - input; +} + +/* convert from set array to cpuset bitmap */ +static int +convert_to_cpuset(rte_cpuset_t *cpusetp, + uint16_t *set, unsigned num) +{ + unsigned idx; + + CPU_ZERO(cpusetp); + + for (idx = 0; idx < num; idx++) { + if (!set[idx]) + continue; + + if (!lcore_config[idx].detected) { + RTE_LOG(ERR, EAL, "core %u " + "unavailable\n", idx); + return -1; + } + + CPU_SET(idx, cpusetp); + } + + return 0; +} + +/* + * The format pattern: --lcores='[<,lcores[@cpus]>...]' + * lcores, cpus could be a single digit/range or a group. + * '(' and ')' are necessary if it's a group. + * If not supply '@cpus', the value of cpus uses the same as lcores. + * e.g. '1,2@(5-7),(3-5)@(0,2),(0,6),7-8' means start 9 EAL thread as below + * lcore 0 runs on cpuset 0x41 (cpu 0,6) + * lcore 1 runs on cpuset 0x2 (cpu 1) + * lcore 2 runs on cpuset 0xe0 (cpu 5,6,7) + * lcore 3,4,5 runs on cpuset 0x5 (cpu 0,2) + * lcore 6 runs on cpuset 0x41 (cpu 0,6) + * lcore 7 runs on cpuset 0x80 (cpu 7) + * lcore 8 runs on cpuset 0x100 (cpu 8) + */ +static int +eal_parse_lcores(const char *lcores) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + static uint16_t set[RTE_MAX_LCORE]; + unsigned idx = 0; + int i; + unsigned count = 0; + const char *lcore_start = NULL; + const char *end = NULL; + int offset; + rte_cpuset_t cpuset; + int lflags = 0; + int ret = -1; + + if (lcores == NULL) + return -1; + + /* Remove all blank characters ahead and after */ + while (isblank(*lcores)) + lcores++; + i = strlen(lcores); + while ((i > 0) && isblank(lcores[i - 1])) + i--; + + CPU_ZERO(&cpuset); + + /* Reset lcore config */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + cfg->lcore_role[idx] = ROLE_OFF; + lcore_config[idx].core_index = -1; + CPU_ZERO(&lcore_config[idx].cpuset); + } + + /* Get list of cores */ + do { + while (isblank(*lcores)) + lcores++; + if (*lcores == '\0') + goto err; + + /* record lcore_set start point */ + lcore_start = lcores; + + /* go across a complete bracket */ + if (*lcore_start == '(') { + lcores += strcspn(lcores, ")"); + if (*lcores++ == '\0') + goto err; + } + + /* scan the separator '@', ','(next) or '\0'(finish) */ + lcores += strcspn(lcores, "@,"); + + if (*lcores == '@') { + /* explicit assign cpu_set */ + offset = eal_parse_set(lcores + 1, set, RTE_DIM(set)); + if (offset < 0) + goto err; + + /* prepare cpu_set and update the end cursor */ + if (0 > convert_to_cpuset(&cpuset, + set, RTE_DIM(set))) + goto err; + end = lcores + 1 + offset; + } else { /* ',' or '\0' */ + /* haven't given cpu_set, current loop done */ + end = lcores; + + /* go back to check - */ + offset = strcspn(lcore_start, "(-"); + if (offset < (end - lcore_start) && + *(lcore_start + offset) != '(') + lflags = 1; + } + + if (*end != ',' && *end != '\0') + goto err; + + /* parse lcore_set from start point */ + if (0 > eal_parse_set(lcore_start, set, RTE_DIM(set))) + goto err; + + /* without '@', by default using lcore_set as cpu_set */ + if (*lcores != '@' && + 0 > convert_to_cpuset(&cpuset, set, RTE_DIM(set))) + goto err; + + /* start to update lcore_set */ + for (idx = 0; idx < RTE_MAX_LCORE; idx++) { + if (!set[idx]) + continue; + + if (cfg->lcore_role[idx] != ROLE_RTE) { + lcore_config[idx].core_index = count; + cfg->lcore_role[idx] = ROLE_RTE; + count++; + } + + if (lflags) { + CPU_ZERO(&cpuset); + CPU_SET(idx, &cpuset); + } + rte_memcpy(&lcore_config[idx].cpuset, &cpuset, + sizeof(rte_cpuset_t)); + } + + lcores = end + 1; + } while (*end != '\0'); + + if (count == 0) + goto err; + + cfg->lcore_count = count; + ret = 0; + +err: + + return ret; +} + +static int +eal_parse_syslog(const char *facility, struct internal_config *conf) +{ + int i; + static struct { + const char *name; + int value; + } map[] = { + { "auth", LOG_AUTH }, + { "cron", LOG_CRON }, + { "daemon", LOG_DAEMON }, + { "ftp", LOG_FTP }, + { "kern", LOG_KERN }, + { "lpr", LOG_LPR }, + { "mail", LOG_MAIL }, + { "news", LOG_NEWS }, + { "syslog", LOG_SYSLOG }, + { "user", LOG_USER }, + { "uucp", LOG_UUCP }, + { "local0", LOG_LOCAL0 }, + { "local1", LOG_LOCAL1 }, + { "local2", LOG_LOCAL2 }, + { "local3", LOG_LOCAL3 }, + { "local4", LOG_LOCAL4 }, + { "local5", LOG_LOCAL5 }, + { "local6", LOG_LOCAL6 }, + { "local7", LOG_LOCAL7 }, + { NULL, 0 } + }; + + for (i = 0; map[i].name; i++) { + if (!strcmp(facility, map[i].name)) { + conf->syslog_facility = map[i].value; + return 0; + } + } + return -1; +} + +static int +eal_parse_log_level(const char *level, uint32_t *log_level) +{ + char *end; + unsigned long tmp; + + errno = 0; + tmp = strtoul(level, &end, 0); + + /* check for errors */ + if ((errno != 0) || (level[0] == '\0') || + end == NULL || (*end != '\0')) + return -1; + + /* log_level is a uint32_t */ + if (tmp >= UINT32_MAX) + return -1; + + *log_level = tmp; + return 0; +} + +static enum rte_proc_type_t +eal_parse_proc_type(const char *arg) +{ + if (strncasecmp(arg, "primary", sizeof("primary")) == 0) + return RTE_PROC_PRIMARY; + if (strncasecmp(arg, "secondary", sizeof("secondary")) == 0) + return RTE_PROC_SECONDARY; + if (strncasecmp(arg, "auto", sizeof("auto")) == 0) + return RTE_PROC_AUTO; + + return RTE_PROC_INVALID; +} + +int +eal_parse_common_option(int opt, const char *optarg, + struct internal_config *conf) +{ + switch (opt) { + /* blacklist */ + case 'b': + if (rte_eal_devargs_add(RTE_DEVTYPE_BLACKLISTED_PCI, + optarg) < 0) { + return -1; + } + break; + /* whitelist */ + case 'w': + if (rte_eal_devargs_add(RTE_DEVTYPE_WHITELISTED_PCI, + optarg) < 0) { + return -1; + } + break; + /* coremask */ + case 'c': + if (eal_parse_coremask(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid coremask\n"); + return -1; + } + break; + /* corelist */ + case 'l': + if (eal_parse_corelist(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid core list\n"); + return -1; + } + break; + /* size of memory */ + case 'm': + conf->memory = atoi(optarg); + conf->memory *= 1024ULL; + conf->memory *= 1024ULL; + mem_parsed = 1; + break; + /* force number of channels */ + case 'n': + conf->force_nchannel = atoi(optarg); + if (conf->force_nchannel == 0) { + RTE_LOG(ERR, EAL, "invalid channel number\n"); + return -1; + } + break; + /* force number of ranks */ + case 'r': + conf->force_nrank = atoi(optarg); + if (conf->force_nrank == 0 || + conf->force_nrank > 16) { + RTE_LOG(ERR, EAL, "invalid rank number\n"); + return -1; + } + break; + /* force loading of external driver */ + case 'd': + if (eal_plugin_add(optarg) == -1) + return -1; + break; + case 'v': + /* since message is explicitly requested by user, we + * write message at highest log level so it can always + * be seen + * even if info or warning messages are disabled */ + RTE_LOG(CRIT, EAL, "RTE Version: '%s'\n", rte_version()); + break; + + /* long options */ + case OPT_HUGE_UNLINK_NUM: + conf->hugepage_unlink = 1; + break; + + case OPT_NO_HUGE_NUM: + conf->no_hugetlbfs = 1; + break; + + case OPT_NO_PCI_NUM: + conf->no_pci = 1; + break; + + case OPT_NO_HPET_NUM: + conf->no_hpet = 1; + break; + + case OPT_VMWARE_TSC_MAP_NUM: + conf->vmware_tsc_map = 1; + break; + + case OPT_NO_SHCONF_NUM: + conf->no_shconf = 1; + break; + + case OPT_PROC_TYPE_NUM: + conf->process_type = eal_parse_proc_type(optarg); + break; + + case OPT_MASTER_LCORE_NUM: + if (eal_parse_master_lcore(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_MASTER_LCORE "\n"); + return -1; + } + break; + + case OPT_VDEV_NUM: + if (rte_eal_devargs_add(RTE_DEVTYPE_VIRTUAL, + optarg) < 0) { + return -1; + } + break; + + case OPT_SYSLOG_NUM: + if (eal_parse_syslog(optarg, conf) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SYSLOG "\n"); + return -1; + } + break; + + case OPT_LOG_LEVEL_NUM: { + uint32_t log; + + if (eal_parse_log_level(optarg, &log) < 0) { + RTE_LOG(ERR, EAL, + "invalid parameters for --" + OPT_LOG_LEVEL "\n"); + return -1; + } + conf->log_level = log; + break; + } + case OPT_LCORES_NUM: + if (eal_parse_lcores(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_LCORES "\n"); + return -1; + } + break; + + /* don't know what to do, leave this to caller */ + default: + return 1; + + } + + return 0; +} + +int +eal_adjust_config(struct internal_config *internal_cfg) +{ + int i; + struct rte_config *cfg = rte_eal_get_configuration(); + + if (internal_config.process_type == RTE_PROC_AUTO) + internal_config.process_type = eal_proc_type_detect(); + + /* default master lcore is the first one */ + if (!master_lcore_parsed) + cfg->master_lcore = rte_get_next_lcore(-1, 0, 0); + + /* if no memory amounts were requested, this will result in 0 and + * will be overridden later, right after eal_hugepage_info_init() */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + internal_cfg->memory += internal_cfg->socket_mem[i]; + + return 0; +} + +int +eal_check_common_options(struct internal_config *internal_cfg) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + + if (cfg->lcore_role[cfg->master_lcore] != ROLE_RTE) { + RTE_LOG(ERR, EAL, "Master lcore is not enabled for DPDK\n"); + return -1; + } + + if (internal_cfg->process_type == RTE_PROC_INVALID) { + RTE_LOG(ERR, EAL, "Invalid process type specified\n"); + return -1; + } + if (index(internal_cfg->hugefile_prefix, '%') != NULL) { + RTE_LOG(ERR, EAL, "Invalid char, '%%', in --"OPT_FILE_PREFIX" " + "option\n"); + return -1; + } + if (mem_parsed && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options -m and --"OPT_SOCKET_MEM" cannot " + "be specified at the same time\n"); + return -1; + } + if (internal_cfg->no_hugetlbfs && internal_cfg->force_sockets == 1) { + RTE_LOG(ERR, EAL, "Option --"OPT_SOCKET_MEM" cannot " + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } + + if (internal_cfg->no_hugetlbfs && internal_cfg->hugepage_unlink) { + RTE_LOG(ERR, EAL, "Option --"OPT_HUGE_UNLINK" cannot " + "be specified together with --"OPT_NO_HUGE"\n"); + return -1; + } + + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) != 0 && + rte_eal_devargs_type_count(RTE_DEVTYPE_BLACKLISTED_PCI) != 0) { + RTE_LOG(ERR, EAL, "Options blacklist (-b) and whitelist (-w) " + "cannot be used at the same time\n"); + return -1; + } + + return 0; +} + +void +eal_common_usage(void) +{ + printf("[options]\n\n" + "EAL common options:\n" + " -c COREMASK Hexadecimal bitmask of cores to run on\n" + " -l CORELIST List of cores to run on\n" + " The argument format is [-c2][,c3[-c4],...]\n" + " where c1, c2, etc are core indexes between 0 and %d\n" + " --"OPT_LCORES" COREMAP Map lcore set to physical cpu set\n" + " The argument format is\n" + " '[<,lcores[@cpus]>...]'\n" + " lcores and cpus list are grouped by '(' and ')'\n" + " Within the group, '-' is used for range separator,\n" + " ',' is used for single number separator.\n" + " '( )' can be omitted for single element group,\n" + " '@' can be omitted if cpus and lcores have the same value\n" + " --"OPT_MASTER_LCORE" ID Core ID that is used as master\n" + " -n CHANNELS Number of memory channels\n" + " -m MB Memory to allocate (see also --"OPT_SOCKET_MEM")\n" + " -r RANKS Force number of memory ranks (don't detect)\n" + " -b, --"OPT_PCI_BLACKLIST" Add a PCI device in black list.\n" + " Prevent EAL from using this PCI device. The argument\n" + " format is .\n" + " -w, --"OPT_PCI_WHITELIST" Add a PCI device in white list.\n" + " Only use the specified PCI devices. The argument format\n" + " is <[domain:]bus:devid.func>. This option can be present\n" + " several times (once per device).\n" + " [NOTE: PCI whitelist cannot be used with -b option]\n" + " --"OPT_VDEV" Add a virtual device.\n" + " The argument format is [,key=val,...]\n" + " (ex: --vdev=eth_pcap0,iface=eth2).\n" + " -d LIB.so|DIR Add a driver or driver directory\n" + " (can be used multiple times)\n" + " --"OPT_VMWARE_TSC_MAP" Use VMware TSC map instead of native RDTSC\n" + " --"OPT_PROC_TYPE" Type of this process (primary|secondary|auto)\n" + " --"OPT_SYSLOG" Set syslog facility\n" + " --"OPT_LOG_LEVEL" Set default log level\n" + " -v Display version information on startup\n" + " -h, --help This help\n" + "\nEAL options for DEBUG use only:\n" + " --"OPT_HUGE_UNLINK" Unlink hugepage files after init\n" + " --"OPT_NO_HUGE" Use malloc instead of hugetlbfs\n" + " --"OPT_NO_PCI" Disable PCI\n" + " --"OPT_NO_HPET" Disable HPET\n" + " --"OPT_NO_SHCONF" No shared config (mmap'd files)\n" + "\n", RTE_MAX_LCORE); +} diff --git a/lib/librte_eal/common/eal_common_pci.c b/lib/librte_eal/common/eal_common_pci.c new file mode 100644 index 00000000..40f49229 --- /dev/null +++ b/lib/librte_eal/common/eal_common_pci.c @@ -0,0 +1,457 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright 2013-2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +struct pci_driver_list pci_driver_list; +struct pci_device_list pci_device_list; + +static struct rte_devargs *pci_devargs_lookup(struct rte_pci_device *dev) +{ + struct rte_devargs *devargs; + + TAILQ_FOREACH(devargs, &devargs_list, next) { + if (devargs->type != RTE_DEVTYPE_BLACKLISTED_PCI && + devargs->type != RTE_DEVTYPE_WHITELISTED_PCI) + continue; + if (!rte_eal_compare_pci_addr(&dev->addr, &devargs->pci.addr)) + return devargs; + } + return NULL; +} + +/* map a particular resource from a file */ +void * +pci_map_resource(void *requested_addr, int fd, off_t offset, size_t size, + int additional_flags) +{ + void *mapaddr; + + /* Map the PCI memory resource of device */ + mapaddr = mmap(requested_addr, size, PROT_READ | PROT_WRITE, + MAP_SHARED | additional_flags, fd, offset); + if (mapaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): cannot mmap(%d, %p, 0x%lx, 0x%lx): %s (%p)\n", + __func__, fd, requested_addr, + (unsigned long)size, (unsigned long)offset, + strerror(errno), mapaddr); + } else + RTE_LOG(DEBUG, EAL, " PCI memory mapped at %p\n", mapaddr); + + return mapaddr; +} + +/* unmap a particular resource */ +void +pci_unmap_resource(void *requested_addr, size_t size) +{ + if (requested_addr == NULL) + return; + + /* Unmap the PCI memory resource of device */ + if (munmap(requested_addr, size)) { + RTE_LOG(ERR, EAL, "%s(): cannot munmap(%p, 0x%lx): %s\n", + __func__, requested_addr, (unsigned long)size, + strerror(errno)); + } else + RTE_LOG(DEBUG, EAL, " PCI memory unmapped at %p\n", + requested_addr); +} + +/* + * If vendor/device ID match, call the devinit() function of the + * driver. + */ +static int +rte_eal_pci_probe_one_driver(struct rte_pci_driver *dr, struct rte_pci_device *dev) +{ + int ret; + const struct rte_pci_id *id_table; + + for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + + struct rte_pci_addr *loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, loc->function, + dev->numa_node); + + RTE_LOG(DEBUG, EAL, " probe driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + + /* no initialization when blacklisted, return without error */ + if (dev->devargs != NULL && + dev->devargs->type == RTE_DEVTYPE_BLACKLISTED_PCI) { + RTE_LOG(DEBUG, EAL, " Device is blacklisted, not initializing\n"); + return 1; + } + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) { + /* map resources for devices that use igb_uio */ + ret = rte_eal_pci_map_device(dev); + if (ret != 0) + return ret; + } else if (dr->drv_flags & RTE_PCI_DRV_FORCE_UNBIND && + rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* unbind current driver */ + if (pci_unbind_kernel_driver(dev) < 0) + return -1; + } + + /* reference driver structure */ + dev->driver = dr; + + /* call the driver devinit() function */ + return dr->devinit(dr, dev); + } + /* return positive value if driver doesn't support this device */ + return 1; +} + +/* + * If vendor/device ID match, call the devuninit() function of the + * driver. + */ +static int +rte_eal_pci_detach_dev(struct rte_pci_driver *dr, + struct rte_pci_device *dev) +{ + const struct rte_pci_id *id_table; + + if ((dr == NULL) || (dev == NULL)) + return -EINVAL; + + for (id_table = dr->id_table; id_table->vendor_id != 0; id_table++) { + + /* check if device's identifiers match the driver's ones */ + if (id_table->vendor_id != dev->id.vendor_id && + id_table->vendor_id != PCI_ANY_ID) + continue; + if (id_table->device_id != dev->id.device_id && + id_table->device_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_vendor_id != dev->id.subsystem_vendor_id && + id_table->subsystem_vendor_id != PCI_ANY_ID) + continue; + if (id_table->subsystem_device_id != dev->id.subsystem_device_id && + id_table->subsystem_device_id != PCI_ANY_ID) + continue; + + struct rte_pci_addr *loc = &dev->addr; + + RTE_LOG(DEBUG, EAL, "PCI device "PCI_PRI_FMT" on NUMA socket %i\n", + loc->domain, loc->bus, loc->devid, + loc->function, dev->numa_node); + + RTE_LOG(DEBUG, EAL, " remove driver: %x:%x %s\n", dev->id.vendor_id, + dev->id.device_id, dr->name); + + if (dr->devuninit && (dr->devuninit(dev) < 0)) + return -1; /* negative value is an error */ + + /* clear driver structure */ + dev->driver = NULL; + + if (dr->drv_flags & RTE_PCI_DRV_NEED_MAPPING) + /* unmap resources for devices that use igb_uio */ + rte_eal_pci_unmap_device(dev); + + return 0; + } + + /* return positive value if driver doesn't support this device */ + return 1; +} + +/* + * If vendor/device ID match, call the devinit() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_probe_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + TAILQ_FOREACH(dr, &pci_driver_list, next) { + rc = rte_eal_pci_probe_one_driver(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * If vendor/device ID match, call the devuninit() function of all + * registered driver for the given device. Return -1 if initialization + * failed, return 1 if no driver is found for this device. + */ +static int +pci_detach_all_drivers(struct rte_pci_device *dev) +{ + struct rte_pci_driver *dr = NULL; + int rc = 0; + + if (dev == NULL) + return -1; + + TAILQ_FOREACH(dr, &pci_driver_list, next) { + rc = rte_eal_pci_detach_dev(dr, dev); + if (rc < 0) + /* negative value is an error */ + return -1; + if (rc > 0) + /* positive value means driver doesn't support it */ + continue; + return 0; + } + return 1; +} + +/* + * Find the pci device specified by pci address, then invoke probe function of + * the driver of the devive. + */ +int +rte_eal_pci_probe_one(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + if (rte_eal_compare_pci_addr(&dev->addr, addr)) + continue; + + ret = pci_probe_all_drivers(dev); + if (ret < 0) + goto err_return; + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Detach device specified by its pci address. + */ +int +rte_eal_pci_detach(const struct rte_pci_addr *addr) +{ + struct rte_pci_device *dev = NULL; + int ret = 0; + + if (addr == NULL) + return -1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + if (rte_eal_compare_pci_addr(&dev->addr, addr)) + continue; + + ret = pci_detach_all_drivers(dev); + if (ret < 0) + goto err_return; + + TAILQ_REMOVE(&pci_device_list, dev, next); + return 0; + } + return -1; + +err_return: + RTE_LOG(WARNING, EAL, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + return -1; +} + +/* + * Scan the content of the PCI bus, and call the devinit() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + */ +int +rte_eal_pci_probe(void) +{ + struct rte_pci_device *dev = NULL; + struct rte_devargs *devargs; + int probe_all = 0; + int ret = 0; + + if (rte_eal_devargs_type_count(RTE_DEVTYPE_WHITELISTED_PCI) == 0) + probe_all = 1; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + + /* set devargs in PCI structure */ + devargs = pci_devargs_lookup(dev); + if (devargs != NULL) + dev->devargs = devargs; + + /* probe all or only whitelisted devices */ + if (probe_all) + ret = pci_probe_all_drivers(dev); + else if (devargs != NULL && + devargs->type == RTE_DEVTYPE_WHITELISTED_PCI) + ret = pci_probe_all_drivers(dev); + if (ret < 0) + rte_exit(EXIT_FAILURE, "Requested device " PCI_PRI_FMT + " cannot be used\n", dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + } + + return 0; +} + +/* dump one device */ +static int +pci_dump_one_device(FILE *f, struct rte_pci_device *dev) +{ + int i; + + fprintf(f, PCI_PRI_FMT, dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + fprintf(f, " - vendor:%x device:%x\n", dev->id.vendor_id, + dev->id.device_id); + + for (i = 0; i != sizeof(dev->mem_resource) / + sizeof(dev->mem_resource[0]); i++) { + fprintf(f, " %16.16"PRIx64" %16.16"PRIx64"\n", + dev->mem_resource[i].phys_addr, + dev->mem_resource[i].len); + } + return 0; +} + +/* dump devices on the bus */ +void +rte_eal_pci_dump(FILE *f) +{ + struct rte_pci_device *dev = NULL; + + TAILQ_FOREACH(dev, &pci_device_list, next) { + pci_dump_one_device(f, dev); + } +} + +/* register a driver */ +void +rte_eal_pci_register(struct rte_pci_driver *driver) +{ + TAILQ_INSERT_TAIL(&pci_driver_list, driver, next); +} + +/* unregister a driver */ +void +rte_eal_pci_unregister(struct rte_pci_driver *driver) +{ + TAILQ_REMOVE(&pci_driver_list, driver, next); +} diff --git a/lib/librte_eal/common/eal_common_pci_uio.c b/lib/librte_eal/common/eal_common_pci_uio.c new file mode 100644 index 00000000..f062e81d --- /dev/null +++ b/lib/librte_eal/common/eal_common_pci_uio.c @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include "eal_private.h" + +static struct rte_tailq_elem rte_uio_tailq = { + .name = "UIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_uio_tailq) + +static int +pci_uio_map_secondary(struct rte_pci_device *dev) +{ + int fd, i; + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) + continue; + + for (i = 0; i != uio_res->nb_maps; i++) { + /* + * open devname, to mmap it + */ + fd = open(uio_res->maps[i].path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + uio_res->maps[i].path, strerror(errno)); + return -1; + } + + void *mapaddr = pci_map_resource(uio_res->maps[i].addr, + fd, (off_t)uio_res->maps[i].offset, + (size_t)uio_res->maps[i].size, 0); + /* fd is not needed in slave process, close it */ + close(fd); + if (mapaddr != uio_res->maps[i].addr) { + RTE_LOG(ERR, EAL, + "Cannot mmap device resource file %s to address: %p\n", + uio_res->maps[i].path, + uio_res->maps[i].addr); + return -1; + } + } + return 0; + } + + RTE_LOG(ERR, EAL, "Cannot find resource for device\n"); + return 1; +} + +/* map the PCI resource of a PCI device in virtual memory */ +int +pci_uio_map_resource(struct rte_pci_device *dev) +{ + int i, map_idx = 0, ret; + uint64_t phaddr; + struct mapped_pci_resource *uio_res = NULL; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + dev->intr_handle.fd = -1; + dev->intr_handle.uio_cfg_fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* secondary processes - use already recorded details */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_map_secondary(dev); + + /* allocate uio resource */ + ret = pci_uio_alloc_resource(dev, &uio_res); + if (ret) + return ret; + + /* Map all BARs */ + for (i = 0; i != PCI_MAX_RESOURCE; i++) { + /* skip empty BAR */ + phaddr = dev->mem_resource[i].phys_addr; + if (phaddr == 0) + continue; + + ret = pci_uio_map_resource_by_index(dev, i, + uio_res, map_idx); + if (ret) + goto error; + + map_idx++; + } + + uio_res->nb_maps = map_idx; + + TAILQ_INSERT_TAIL(uio_res_list, uio_res, next); + + return 0; +error: + for (i = 0; i < map_idx; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } + pci_uio_free_resource(dev, uio_res); + return -1; +} + +static void +pci_uio_unmap(struct mapped_pci_resource *uio_res) +{ + int i; + + if (uio_res == NULL) + return; + + for (i = 0; i != uio_res->nb_maps; i++) { + pci_unmap_resource(uio_res->maps[i].addr, + (size_t)uio_res->maps[i].size); + rte_free(uio_res->maps[i].path); + } +} + +static struct mapped_pci_resource * +pci_uio_find_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return NULL; + + TAILQ_FOREACH(uio_res, uio_res_list, next) { + + /* skip this element if it doesn't match our PCI address */ + if (!rte_eal_compare_pci_addr(&uio_res->pci_addr, &dev->addr)) + return uio_res; + } + return NULL; +} + +/* unmap the PCI resource of a PCI device in virtual memory */ +void +pci_uio_unmap_resource(struct rte_pci_device *dev) +{ + struct mapped_pci_resource *uio_res; + struct mapped_pci_res_list *uio_res_list = + RTE_TAILQ_CAST(rte_uio_tailq.head, mapped_pci_res_list); + + if (dev == NULL) + return; + + /* find an entry for the device */ + uio_res = pci_uio_find_resource(dev); + if (uio_res == NULL) + return; + + /* secondary processes - just free maps */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return pci_uio_unmap(uio_res); + + TAILQ_REMOVE(uio_res_list, uio_res, next); + + /* unmap all resources */ + pci_uio_unmap(uio_res); + + /* free uio resource */ + rte_free(uio_res); + + /* close fd if in primary process */ + close(dev->intr_handle.fd); + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; +} diff --git a/lib/librte_eal/common/eal_common_proc.c b/lib/librte_eal/common/eal_common_proc.c new file mode 100644 index 00000000..12e0fcac --- /dev/null +++ b/lib/librte_eal/common/eal_common_proc.c @@ -0,0 +1,61 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_internal_cfg.h" + +int +rte_eal_primary_proc_alive(const char *config_file_path) +{ + int config_fd; + + if (config_file_path) + config_fd = open(config_file_path, O_RDONLY); + else { + char default_path[PATH_MAX+1]; + snprintf(default_path, PATH_MAX, RUNTIME_CONFIG_FMT, + default_config_dir, "rte"); + config_fd = open(default_path, O_RDONLY); + } + if (config_fd < 0) + return 0; + + int ret = lockf(config_fd, F_TEST, 0); + close(config_fd); + + return !!ret; +} diff --git a/lib/librte_eal/common/eal_common_string_fns.c b/lib/librte_eal/common/eal_common_string_fns.c new file mode 100644 index 00000000..125a3e2d --- /dev/null +++ b/lib/librte_eal/common/eal_common_string_fns.c @@ -0,0 +1,69 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include + +/* split string into tokens */ +int +rte_strsplit(char *string, int stringlen, + char **tokens, int maxtokens, char delim) +{ + int i, tok = 0; + int tokstart = 1; /* first token is right at start of string */ + + if (string == NULL || tokens == NULL) + goto einval_error; + + for (i = 0; i < stringlen; i++) { + if (string[i] == '\0' || tok >= maxtokens) + break; + if (tokstart) { + tokstart = 0; + tokens[tok++] = &string[i]; + } + if (string[i] == delim) { + string[i] = '\0'; + tokstart = 1; + } + } + return tok; + +einval_error: + errno = EINVAL; + return -1; +} diff --git a/lib/librte_eal/common/eal_common_tailqs.c b/lib/librte_eal/common/eal_common_tailqs.c new file mode 100644 index 00000000..bb08ec8b --- /dev/null +++ b/lib/librte_eal/common/eal_common_tailqs.c @@ -0,0 +1,202 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +TAILQ_HEAD(rte_tailq_elem_head, rte_tailq_elem); +/* local tailq list */ +static struct rte_tailq_elem_head rte_tailq_elem_head = + TAILQ_HEAD_INITIALIZER(rte_tailq_elem_head); + +/* number of tailqs registered, -1 before call to rte_eal_tailqs_init */ +static int rte_tailqs_count = -1; + +struct rte_tailq_head * +rte_eal_tailq_lookup(const char *name) +{ + unsigned i; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + if (name == NULL) + return NULL; + + for (i = 0; i < RTE_MAX_TAILQ; i++) { + if (!strncmp(name, mcfg->tailq_head[i].name, + RTE_TAILQ_NAMESIZE-1)) + return &mcfg->tailq_head[i]; + } + + return NULL; +} + +void +rte_dump_tailq(FILE *f) +{ + struct rte_mem_config *mcfg; + unsigned i = 0; + + mcfg = rte_eal_get_configuration()->mem_config; + + rte_rwlock_read_lock(&mcfg->qlock); + for (i = 0; i < RTE_MAX_TAILQ; i++) { + const struct rte_tailq_head *tailq = &mcfg->tailq_head[i]; + const struct rte_tailq_entry_head *head = &tailq->tailq_head; + + fprintf(f, "Tailq %u: qname:<%s>, tqh_first:%p, tqh_last:%p\n", + i, tailq->name, head->tqh_first, head->tqh_last); + } + rte_rwlock_read_unlock(&mcfg->qlock); +} + +static struct rte_tailq_head * +rte_eal_tailq_create(const char *name) +{ + struct rte_tailq_head *head = NULL; + + if (!rte_eal_tailq_lookup(name) && + (rte_tailqs_count + 1 < RTE_MAX_TAILQ)) { + struct rte_mem_config *mcfg; + + mcfg = rte_eal_get_configuration()->mem_config; + head = &mcfg->tailq_head[rte_tailqs_count]; + snprintf(head->name, sizeof(head->name) - 1, "%s", name); + TAILQ_INIT(&head->tailq_head); + rte_tailqs_count++; + } + + return head; +} + +/* local register, used to store "early" tailqs before rte_eal_init() and to + * ensure secondary process only registers tailqs once. */ +static int +rte_eal_tailq_local_register(struct rte_tailq_elem *t) +{ + struct rte_tailq_elem *temp; + + TAILQ_FOREACH(temp, &rte_tailq_elem_head, next) { + if (!strncmp(t->name, temp->name, sizeof(temp->name))) + return -1; + } + + TAILQ_INSERT_TAIL(&rte_tailq_elem_head, t, next); + return 0; +} + +static void +rte_eal_tailq_update(struct rte_tailq_elem *t) +{ + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + /* primary process is the only one that creates */ + t->head = rte_eal_tailq_create(t->name); + } else { + t->head = rte_eal_tailq_lookup(t->name); + } +} + +int +rte_eal_tailq_register(struct rte_tailq_elem *t) +{ + if (rte_eal_tailq_local_register(t) < 0) { + RTE_LOG(ERR, EAL, + "%s tailq is already registered\n", t->name); + goto error; + } + + /* if a register happens after rte_eal_tailqs_init(), then we can update + * tailq head */ + if (rte_tailqs_count >= 0) { + rte_eal_tailq_update(t); + if (t->head == NULL) { + RTE_LOG(ERR, EAL, + "Cannot initialize tailq: %s\n", t->name); + TAILQ_REMOVE(&rte_tailq_elem_head, t, next); + goto error; + } + } + + return 0; + +error: + t->head = NULL; + return -1; +} + +int +rte_eal_tailqs_init(void) +{ + struct rte_tailq_elem *t; + + rte_tailqs_count = 0; + + TAILQ_FOREACH(t, &rte_tailq_elem_head, next) { + /* second part of register job for "early" tailqs, see + * rte_eal_tailq_register and EAL_REGISTER_TAILQ */ + rte_eal_tailq_update(t); + if (t->head == NULL) { + RTE_LOG(ERR, EAL, + "Cannot initialize tailq: %s\n", t->name); + /* no need to TAILQ_REMOVE, we are going to panic in + * rte_eal_init() */ + goto fail; + } + } + + return 0; + +fail: + rte_dump_tailq(stderr); + return -1; +} diff --git a/lib/librte_eal/common/eal_common_thread.c b/lib/librte_eal/common/eal_common_thread.c new file mode 100644 index 00000000..2405e93f --- /dev/null +++ b/lib/librte_eal/common/eal_common_thread.c @@ -0,0 +1,157 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "eal_thread.h" + +RTE_DECLARE_PER_LCORE(unsigned , _socket_id); + +unsigned rte_socket_id(void) +{ + return RTE_PER_LCORE(_socket_id); +} + +int eal_cpuset_socket_id(rte_cpuset_t *cpusetp) +{ + unsigned cpu = 0; + int socket_id = SOCKET_ID_ANY; + int sid; + + if (cpusetp == NULL) + return SOCKET_ID_ANY; + + do { + if (!CPU_ISSET(cpu, cpusetp)) + continue; + + if (socket_id == SOCKET_ID_ANY) + socket_id = eal_cpu_socket_id(cpu); + + sid = eal_cpu_socket_id(cpu); + if (socket_id != sid) { + socket_id = SOCKET_ID_ANY; + break; + } + + } while (++cpu < RTE_MAX_LCORE); + + return socket_id; +} + +int +rte_thread_set_affinity(rte_cpuset_t *cpusetp) +{ + int s; + unsigned lcore_id; + pthread_t tid; + + tid = pthread_self(); + + s = pthread_setaffinity_np(tid, sizeof(rte_cpuset_t), cpusetp); + if (s != 0) { + RTE_LOG(ERR, EAL, "pthread_setaffinity_np failed\n"); + return -1; + } + + /* store socket_id in TLS for quick access */ + RTE_PER_LCORE(_socket_id) = + eal_cpuset_socket_id(cpusetp); + + /* store cpuset in TLS for quick access */ + memmove(&RTE_PER_LCORE(_cpuset), cpusetp, + sizeof(rte_cpuset_t)); + + lcore_id = rte_lcore_id(); + if (lcore_id != (unsigned)LCORE_ID_ANY) { + /* EAL thread will update lcore_config */ + lcore_config[lcore_id].socket_id = RTE_PER_LCORE(_socket_id); + memmove(&lcore_config[lcore_id].cpuset, cpusetp, + sizeof(rte_cpuset_t)); + } + + return 0; +} + +void +rte_thread_get_affinity(rte_cpuset_t *cpusetp) +{ + assert(cpusetp); + memmove(cpusetp, &RTE_PER_LCORE(_cpuset), + sizeof(rte_cpuset_t)); +} + +int +eal_thread_dump_affinity(char *str, unsigned size) +{ + rte_cpuset_t cpuset; + unsigned cpu; + int ret; + unsigned int out = 0; + + rte_thread_get_affinity(&cpuset); + + for (cpu = 0; cpu < RTE_MAX_LCORE; cpu++) { + if (!CPU_ISSET(cpu, &cpuset)) + continue; + + ret = snprintf(str + out, + size - out, "%u,", cpu); + if (ret < 0 || (unsigned)ret >= size - out) { + /* string will be truncated */ + ret = -1; + goto exit; + } + + out += ret; + } + + ret = 0; +exit: + /* remove the last separator */ + if (out > 0) + str[out - 1] = '\0'; + + return ret; +} diff --git a/lib/librte_eal/common/eal_common_timer.c b/lib/librte_eal/common/eal_common_timer.c new file mode 100644 index 00000000..c4227cd8 --- /dev/null +++ b/lib/librte_eal/common/eal_common_timer.c @@ -0,0 +1,86 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "eal_private.h" + +/* The frequency of the RDTSC timer resolution */ +static uint64_t eal_tsc_resolution_hz; + +void +rte_delay_us(unsigned us) +{ + const uint64_t start = rte_get_timer_cycles(); + const uint64_t ticks = (uint64_t)us * rte_get_timer_hz() / 1E6; + while ((rte_get_timer_cycles() - start) < ticks) + rte_pause(); +} + +uint64_t +rte_get_tsc_hz(void) +{ + return eal_tsc_resolution_hz; +} + +static uint64_t +estimate_tsc_freq(void) +{ + RTE_LOG(WARNING, EAL, "WARNING: TSC frequency estimated roughly" + " - clock timings may be less accurate.\n"); + /* assume that the sleep(1) will sleep for 1 second */ + uint64_t start = rte_rdtsc(); + sleep(1); + return rte_rdtsc() - start; +} + +void +set_tsc_freq(void) +{ + uint64_t freq = get_tsc_freq(); + + if (!freq) + freq = estimate_tsc_freq(); + + RTE_LOG(DEBUG, EAL, "TSC frequency is ~%" PRIu64 " KHz\n", freq / 1000); + eal_tsc_resolution_hz = freq; +} diff --git a/lib/librte_eal/common/eal_filesystem.h b/lib/librte_eal/common/eal_filesystem.h new file mode 100644 index 00000000..fdb4a70b --- /dev/null +++ b/lib/librte_eal/common/eal_filesystem.h @@ -0,0 +1,118 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Stores functions and path defines for files and directories + * on the filesystem for Linux, that are used by the Linux EAL. + */ + +#ifndef EAL_FILESYSTEM_H +#define EAL_FILESYSTEM_H + +/** Path of rte config file. */ +#define RUNTIME_CONFIG_FMT "%s/.%s_config" + +#include +#include +#include +#include + +#include +#include "eal_internal_cfg.h" + +static const char *default_config_dir = "/var/run"; + +static inline const char * +eal_runtime_config_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, RUNTIME_CONFIG_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + +/** Path of hugepage info file. */ +#define HUGEPAGE_INFO_FMT "%s/.%s_hugepage_info" + +static inline const char * +eal_hugepage_info_path(void) +{ + static char buffer[PATH_MAX]; /* static so auto-zeroed */ + const char *directory = default_config_dir; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + directory = home_dir; + snprintf(buffer, sizeof(buffer) - 1, HUGEPAGE_INFO_FMT, directory, + internal_config.hugefile_prefix); + return buffer; +} + +/** String format for hugepage map files. */ +#define HUGEFILE_FMT "%s/%smap_%d" +#define TEMP_HUGEFILE_FMT "%s/%smap_temp_%d" + +static inline const char * +eal_get_hugefile_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + snprintf(buffer, buflen, HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS +static inline const char * +eal_get_hugefile_temp_path(char *buffer, size_t buflen, const char *hugedir, int f_id) +{ + snprintf(buffer, buflen, TEMP_HUGEFILE_FMT, hugedir, + internal_config.hugefile_prefix, f_id); + buffer[buflen - 1] = '\0'; + return buffer; +} +#endif + +/** define the default filename prefix for the %s values above */ +#define HUGEFILE_PREFIX_DEFAULT "rte" + +/** Function to read a single numeric value from a file on the filesystem. + * Used to read information from files on /sys */ +int eal_parse_sysfs_value(const char *filename, unsigned long *val); + +#endif /* EAL_FILESYSTEM_H */ diff --git a/lib/librte_eal/common/eal_hugepages.h b/lib/librte_eal/common/eal_hugepages.h new file mode 100644 index 00000000..38edac03 --- /dev/null +++ b/lib/librte_eal/common/eal_hugepages.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_HUGEPAGES_H +#define EAL_HUGEPAGES_H + +#include +#include +#include + +#define MAX_HUGEPAGE_PATH PATH_MAX + +/** + * Structure used to store informations about hugepages that we mapped + * through the files in hugetlbfs. + */ +struct hugepage_file { + void *orig_va; /**< virtual addr of first mmap() */ + void *final_va; /**< virtual addr of 2nd mmap() */ + uint64_t physaddr; /**< physical addr */ + size_t size; /**< the page size */ + int socket_id; /**< NUMA socket ID */ + int file_id; /**< the '%d' in HUGEFILE_FMT */ + int memseg_id; /**< the memory segment to which page belongs */ +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int repeated; /**< number of times the page size is repeated */ +#endif + char filepath[MAX_HUGEPAGE_PATH]; /**< path to backing file on filesystem */ +}; + +/** + * Read the information from linux on what hugepages are available + * for the EAL to use + */ +int eal_hugepage_info_init(void); + +#endif /* EAL_HUGEPAGES_H */ diff --git a/lib/librte_eal/common/eal_internal_cfg.h b/lib/librte_eal/common/eal_internal_cfg.h new file mode 100644 index 00000000..5f1367eb --- /dev/null +++ b/lib/librte_eal/common/eal_internal_cfg.h @@ -0,0 +1,94 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Holds the structures for the eal internal configuration + */ + +#ifndef EAL_INTERNAL_CFG_H +#define EAL_INTERNAL_CFG_H + +#include +#include + +#define MAX_HUGEPAGE_SIZES 3 /**< support up to 3 page sizes */ + +/* + * internal configuration structure for the number, size and + * mount points of hugepages + */ +struct hugepage_info { + uint64_t hugepage_sz; /**< size of a huge page */ + const char *hugedir; /**< dir where hugetlbfs is mounted */ + uint32_t num_pages[RTE_MAX_NUMA_NODES]; + /**< number of hugepages of that size on each socket */ + int lock_descriptor; /**< file descriptor for hugepage dir */ +}; + +/** + * internal configuration + */ +struct internal_config { + volatile size_t memory; /**< amount of asked memory */ + volatile unsigned force_nchannel; /**< force number of channels */ + volatile unsigned force_nrank; /**< force number of ranks */ + volatile unsigned no_hugetlbfs; /**< true to disable hugetlbfs */ + unsigned hugepage_unlink; /**< true to unlink backing files */ + volatile unsigned xen_dom0_support; /**< support app running on Xen Dom0*/ + volatile unsigned no_pci; /**< true to disable PCI */ + volatile unsigned no_hpet; /**< true to disable HPET */ + volatile unsigned vmware_tsc_map; /**< true to use VMware TSC mapping + * instead of native TSC */ + volatile unsigned no_shconf; /**< true if there is no shared config */ + volatile unsigned create_uio_dev; /**< true to create /dev/uioX devices */ + volatile enum rte_proc_type_t process_type; /**< multi-process proc type */ + /** true to try allocating memory on specific sockets */ + volatile unsigned force_sockets; + volatile uint64_t socket_mem[RTE_MAX_NUMA_NODES]; /**< amount of memory per socket */ + uintptr_t base_virtaddr; /**< base address to try and reserve memory from */ + volatile int syslog_facility; /**< facility passed to openlog() */ + volatile uint32_t log_level; /**< default log level */ + /** default interrupt mode for VFIO */ + volatile enum rte_intr_mode vfio_intr_mode; + const char *hugefile_prefix; /**< the base filename of hugetlbfs files */ + const char *hugepage_dir; /**< specific hugetlbfs directory to use */ + + unsigned num_hugepage_sizes; /**< how many sizes on this system */ + struct hugepage_info hugepage_info[MAX_HUGEPAGE_SIZES]; +}; +extern struct internal_config internal_config; /**< Global EAL configuration. */ + +void eal_reset_internal_config(struct internal_config *internal_cfg); + +#endif /* EAL_INTERNAL_CFG_H */ diff --git a/lib/librte_eal/common/eal_options.h b/lib/librte_eal/common/eal_options.h new file mode 100644 index 00000000..a881c62e --- /dev/null +++ b/lib/librte_eal/common/eal_options.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_OPTIONS_H +#define EAL_OPTIONS_H + +enum { + /* long options mapped to a short option */ +#define OPT_HELP "help" + OPT_HELP_NUM = 'h', +#define OPT_PCI_BLACKLIST "pci-blacklist" + OPT_PCI_BLACKLIST_NUM = 'b', +#define OPT_PCI_WHITELIST "pci-whitelist" + OPT_PCI_WHITELIST_NUM = 'w', + + /* first long only option value must be >= 256, so that we won't + * conflict with short options */ + OPT_LONG_MIN_NUM = 256, +#define OPT_BASE_VIRTADDR "base-virtaddr" + OPT_BASE_VIRTADDR_NUM, +#define OPT_CREATE_UIO_DEV "create-uio-dev" + OPT_CREATE_UIO_DEV_NUM, +#define OPT_FILE_PREFIX "file-prefix" + OPT_FILE_PREFIX_NUM, +#define OPT_HUGE_DIR "huge-dir" + OPT_HUGE_DIR_NUM, +#define OPT_HUGE_UNLINK "huge-unlink" + OPT_HUGE_UNLINK_NUM, +#define OPT_LCORES "lcores" + OPT_LCORES_NUM, +#define OPT_LOG_LEVEL "log-level" + OPT_LOG_LEVEL_NUM, +#define OPT_MASTER_LCORE "master-lcore" + OPT_MASTER_LCORE_NUM, +#define OPT_PROC_TYPE "proc-type" + OPT_PROC_TYPE_NUM, +#define OPT_NO_HPET "no-hpet" + OPT_NO_HPET_NUM, +#define OPT_NO_HUGE "no-huge" + OPT_NO_HUGE_NUM, +#define OPT_NO_PCI "no-pci" + OPT_NO_PCI_NUM, +#define OPT_NO_SHCONF "no-shconf" + OPT_NO_SHCONF_NUM, +#define OPT_SOCKET_MEM "socket-mem" + OPT_SOCKET_MEM_NUM, +#define OPT_SYSLOG "syslog" + OPT_SYSLOG_NUM, +#define OPT_VDEV "vdev" + OPT_VDEV_NUM, +#define OPT_VFIO_INTR "vfio-intr" + OPT_VFIO_INTR_NUM, +#define OPT_VMWARE_TSC_MAP "vmware-tsc-map" + OPT_VMWARE_TSC_MAP_NUM, +#define OPT_XEN_DOM0 "xen-dom0" + OPT_XEN_DOM0_NUM, + OPT_LONG_MAX_NUM +}; + +extern const char eal_short_options[]; +extern const struct option eal_long_options[]; + +int eal_parse_common_option(int opt, const char *argv, + struct internal_config *conf); +int eal_adjust_config(struct internal_config *internal_cfg); +int eal_check_common_options(struct internal_config *internal_cfg); +void eal_common_usage(void); +enum rte_proc_type_t eal_proc_type_detect(void); +int eal_plugins_init(void); + +#endif /* EAL_OPTIONS_H */ diff --git a/lib/librte_eal/common/eal_private.h b/lib/librte_eal/common/eal_private.h new file mode 100644 index 00000000..2342fa16 --- /dev/null +++ b/lib/librte_eal/common/eal_private.h @@ -0,0 +1,331 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _EAL_PRIVATE_H_ +#define _EAL_PRIVATE_H_ + +#include +#include + +/** + * Initialize the memzone subsystem (private to eal). + * + * @return + * - 0 on success + * - Negative on error + */ +int rte_eal_memzone_init(void); + +/** + * Common log initialization function (private to eal). + * + * Called by environment-specific log initialization function to initialize + * log history. + * + * @param default_log + * The default log stream to be used. + * @return + * - 0 on success + * - Negative on error + */ +int rte_eal_common_log_init(FILE *default_log); + +/** + * Fill configuration with number of physical and logical processors + * + * This function is private to EAL. + * + * Parse /proc/cpuinfo to get the number of physical and logical + * processors on the machine. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_cpu_init(void); + +/** + * Map memory + * + * This function is private to EAL. + * + * Fill configuration structure with these infos, and return 0 on success. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_memory_init(void); + +/** + * Configure timers + * + * This function is private to EAL. + * + * Mmap memory areas used by HPET (high precision event timer) that will + * provide our time reference, and configure the TSC frequency also for it + * to be used as a reference. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_timer_init(void); + +/** + * Init early logs + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_log_early_init(void); + +/** + * Init the default log stream + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_log_init(const char *id, int facility); + +/** + * Init the default log stream + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_pci_init(void); + +#ifdef RTE_LIBRTE_IVSHMEM +/** + * Init the memory from IVSHMEM devices + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_ivshmem_init(void); + +/** + * Init objects in IVSHMEM devices + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_ivshmem_obj_init(void); +#endif + +struct rte_pci_driver; +struct rte_pci_device; + +/** + * Unbind kernel driver for this device + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_unbind_kernel_driver(struct rte_pci_device *dev); + +/** + * Map the PCI resource of a PCI device in virtual memory + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource(struct rte_pci_device *dev); + +/** + * Unmap the PCI resource of a PCI device + * + * This function is private to EAL. + */ +void pci_uio_unmap_resource(struct rte_pci_device *dev); + +/** + * Allocate uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to allocate uio resource + * @param uio_res + * Pointer to uio resource. + * If the function returns 0, the pointer will be filled. + * @return + * 0 on success, negative on error + */ +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); + +/** + * Free uio resource for PCI device + * + * This function is private to EAL. + * + * @param dev + * PCI device to free uio resource + * @param uio_res + * Pointer to uio resource. + */ +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); + +/** + * Map device memory to uio resource + * + * This function is private to EAL. + * + * @param dev + * PCI device that has memory information. + * @param res_idx + * Memory resource index of the PCI device. + * @param uio_res + * uio resource that will keep mapping information. + * @param map_idx + * Mapping information index of the uio resource. + * @return + * 0 on success, negative on error + */ +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +/** + * Init tail queues for non-EAL library structures. This is to allow + * the rings, mempools, etc. lists to be shared among multiple processes + * + * This function is private to EAL + * + * @return + * 0 on success, negative on error + */ +int rte_eal_tailqs_init(void); + +/** + * Init interrupt handling. + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_intr_init(void); + +/** + * Init alarm mechanism. This is to allow a callback be called after + * specific time. + * + * This function is private to EAL. + * + * @return + * 0 on success, negative on error + */ +int rte_eal_alarm_init(void); + +/** + * This function initialises any virtual devices + * + * This function is private to the EAL. + */ +int rte_eal_dev_init(void); + +/** + * Function is to check if the kernel module(like, vfio, vfio_iommu_type1, + * etc.) loaded. + * + * @param module_name + * The module's name which need to be checked + * + * @return + * -1 means some error happens(NULL pointer or open failure) + * 0 means the module not loaded + * 1 means the module loaded + */ +int rte_eal_check_module(const char *module_name); + +/** + * Get cpu core_id. + * + * This function is private to the EAL. + */ +unsigned eal_cpu_core_id(unsigned lcore_id); + +/** + * Check if cpu is present. + * + * This function is private to the EAL. + */ +int eal_cpu_detected(unsigned lcore_id); + +/** + * Set TSC frequency from precise value or estimation + * + * This function is private to the EAL. + */ +void set_tsc_freq(void); + +/** + * Get precise TSC frequency from system + * + * This function is private to the EAL. + */ +uint64_t get_tsc_freq(void); + +/** + * Prepare physical memory mapping + * i.e. hugepages on Linux and + * contigmem on BSD. + * + * This function is private to the EAL. + */ +int rte_eal_hugepage_init(void); + +/** + * Creates memory mapping in secondary process + * i.e. hugepages on Linux and + * contigmem on BSD. + * + * This function is private to the EAL. + */ +int rte_eal_hugepage_attach(void); + +#endif /* _EAL_PRIVATE_H_ */ diff --git a/lib/librte_eal/common/eal_thread.h b/lib/librte_eal/common/eal_thread.h new file mode 100644 index 00000000..e4e76b9d --- /dev/null +++ b/lib/librte_eal/common/eal_thread.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_THREAD_H +#define EAL_THREAD_H + +#include + +/** + * basic loop of thread, called for each thread by eal_init(). + * + * @param arg + * opaque pointer + */ +__attribute__((noreturn)) void *eal_thread_loop(void *arg); + +/** + * Init per-lcore info for master thread + * + * @param lcore_id + * identifier of master lcore + */ +void eal_thread_init_master(unsigned lcore_id); + +/** + * Get the NUMA socket id from cpu id. + * This function is private to EAL. + * + * @param cpu_id + * The logical process id. + * @return + * socket_id or SOCKET_ID_ANY + */ +unsigned eal_cpu_socket_id(unsigned cpu_id); + +/** + * Get the NUMA socket id from cpuset. + * This function is private to EAL. + * + * @param cpusetp + * The point to a valid cpu set. + * @return + * socket_id or SOCKET_ID_ANY + */ +int eal_cpuset_socket_id(rte_cpuset_t *cpusetp); + +/** + * Default buffer size to use with eal_thread_dump_affinity() + */ +#define RTE_CPU_AFFINITY_STR_LEN 256 + +/** + * Dump the current pthread cpuset. + * This function is private to EAL. + * + * Note: + * If the dump size is greater than the size of given buffer, + * the string will be truncated and with '\0' at the end. + * + * @param str + * The string buffer the cpuset will dump to. + * @param size + * The string buffer size. + * @return + * 0 for success, -1 if truncation happens. + */ +int +eal_thread_dump_affinity(char *str, unsigned size); + +#endif /* EAL_THREAD_H */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic.h b/lib/librte_eal/common/include/arch/arm/rte_atomic.h new file mode 100644 index 00000000..454a12b0 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic.h @@ -0,0 +1,48 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_ARM_H_ +#define _RTE_ATOMIC_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_wmb() + +#define rte_smp_rmb() rte_rmb() + +#endif /* _RTE_ATOMIC_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h new file mode 100644 index 00000000..9ae1e78b --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_32.h @@ -0,0 +1,74 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_ARM32_H_ +#define _RTE_ATOMIC_ARM32_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + */ +#define rte_mb() __sync_synchronize() + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + */ +#define rte_wmb() do { asm volatile ("dmb st" : : : "memory"); } while (0) + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + */ +#define rte_rmb() __sync_synchronize() + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h new file mode 100644 index 00000000..671caa76 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_atomic_64.h @@ -0,0 +1,88 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_ATOMIC_ARM64_H_ +#define _RTE_ATOMIC_ARM64_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +#define dmb(opt) do { asm volatile("dmb " #opt : : : "memory"); } while (0) + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void) +{ + dmb(ish); +} + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void) +{ + dmb(ishst); +} + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void) +{ + dmb(ishld); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_byteorder.h b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h new file mode 100644 index 00000000..3f2dd1f2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_byteorder.h @@ -0,0 +1,107 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_ARM_H_ +#define _RTE_BYTEORDER_ARM_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +/* fix missing __builtin_bswap16 for gcc older then 4.8 */ +#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) + +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + register uint16_t x = _x; + + asm volatile ("rev16 %0,%1" + : "=r" (x) + : "r" (x) + ); + return x; +} + +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif + +/* ARM architecture is bi-endian (both big and little). */ +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#else /* RTE_BIG_ENDIAN */ + +#define rte_cpu_to_le_16(x) rte_bswap16(x) +#define rte_cpu_to_le_32(x) rte_bswap32(x) +#define rte_cpu_to_le_64(x) rte_bswap64(x) + +#define rte_cpu_to_be_16(x) (x) +#define rte_cpu_to_be_32(x) (x) +#define rte_cpu_to_be_64(x) (x) + +#define rte_le_to_cpu_16(x) rte_bswap16(x) +#define rte_le_to_cpu_32(x) rte_bswap32(x) +#define rte_le_to_cpu_64(x) rte_bswap64(x) + +#define rte_be_to_cpu_16(x) (x) +#define rte_be_to_cpu_32(x) (x) +#define rte_be_to_cpu_64(x) (x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h new file mode 100644 index 00000000..b8f62889 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_ARM_H_ +#define _RTE_CPUFLAGS_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_CPUFLAGS_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h new file mode 100644 index 00000000..eb02d9b9 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_32.h @@ -0,0 +1,82 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_ARM32_H_ +#define _RTE_CPUFLAGS_ARM32_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_SWP = 0, + RTE_CPUFLAG_HALF, + RTE_CPUFLAG_THUMB, + RTE_CPUFLAG_A26BIT, + RTE_CPUFLAG_FAST_MULT, + RTE_CPUFLAG_FPA, + RTE_CPUFLAG_VFP, + RTE_CPUFLAG_EDSP, + RTE_CPUFLAG_JAVA, + RTE_CPUFLAG_IWMMXT, + RTE_CPUFLAG_CRUNCH, + RTE_CPUFLAG_THUMBEE, + RTE_CPUFLAG_NEON, + RTE_CPUFLAG_VFPv3, + RTE_CPUFLAG_VFPv3D16, + RTE_CPUFLAG_TLS, + RTE_CPUFLAG_VFPv4, + RTE_CPUFLAG_IDIVA, + RTE_CPUFLAG_IDIVT, + RTE_CPUFLAG_VFPD32, + RTE_CPUFLAG_LPAE, + RTE_CPUFLAG_EVTSTRM, + RTE_CPUFLAG_AES, + RTE_CPUFLAG_PMULL, + RTE_CPUFLAG_SHA1, + RTE_CPUFLAG_SHA2, + RTE_CPUFLAG_CRC32, + RTE_CPUFLAG_V7L, + /* The last item */ + RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h new file mode 100644 index 00000000..49aead92 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cpuflags_64.h @@ -0,0 +1,64 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_ARM64_H_ +#define _RTE_CPUFLAGS_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_FP = 0, + RTE_CPUFLAG_NEON, + RTE_CPUFLAG_EVTSTRM, + RTE_CPUFLAG_AES, + RTE_CPUFLAG_PMULL, + RTE_CPUFLAG_SHA1, + RTE_CPUFLAG_SHA2, + RTE_CPUFLAG_CRC32, + RTE_CPUFLAG_ATOMICS, + RTE_CPUFLAG_AARCH64, + /* The last item */ + RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles.h b/lib/librte_eal/common/include/arch/arm/rte_cycles.h new file mode 100644 index 00000000..a8009a06 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_ARM_H_ +#define _RTE_CYCLES_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_CYCLES_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h new file mode 100644 index 00000000..9c1be71e --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_32.h @@ -0,0 +1,121 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_ARM32_H_ +#define _RTE_CYCLES_ARM32_H_ + +/* ARM v7 does not have suitable source of clock signals. The only clock counter + available in the core is 32 bit wide. Therefore it is unsuitable as the + counter overlaps every few seconds and probably is not accessible by + userspace programs. Therefore we use clock_gettime(CLOCK_MONOTONIC_RAW) to + simulate counter running at 1GHz. +*/ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +#ifndef RTE_ARM_EAL_RDTSC_USE_PMU + +/** + * This call is easily portable to any ARM architecture, however, + * it may be damn slow and inprecise for some tasks. + */ +static inline uint64_t +__rte_rdtsc_syscall(void) +{ + struct timespec val; + uint64_t v; + + while (clock_gettime(CLOCK_MONOTONIC_RAW, &val) != 0) + /* no body */; + + v = (uint64_t) val.tv_sec * 1000000000LL; + v += (uint64_t) val.tv_nsec; + return v; +} +#define rte_rdtsc __rte_rdtsc_syscall + +#else + +/** + * This function requires to configure the PMCCNTR and enable + * userspace access to it: + * + * asm volatile("mcr p15, 0, %0, c9, c14, 0" : : "r"(1)); + * asm volatile("mcr p15, 0, %0, c9, c12, 0" : : "r"(29)); + * asm volatile("mcr p15, 0, %0, c9, c12, 1" : : "r"(0x8000000f)); + * + * which is possible only from the priviledged mode (kernel space). + */ +static inline uint64_t +__rte_rdtsc_pmccntr(void) +{ + unsigned tsc; + uint64_t final_tsc; + + /* Read PMCCNTR */ + asm volatile("mrc p15, 0, %0, c9, c13, 0" : "=r"(tsc)); + /* 1 tick = 64 clocks */ + final_tsc = ((uint64_t)tsc) << 6; + + return (uint64_t)final_tsc; +} +#define rte_rdtsc __rte_rdtsc_pmccntr + +#endif /* RTE_ARM_EAL_RDTSC_USE_PMU */ + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h new file mode 100644 index 00000000..14f26120 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_cycles_64.h @@ -0,0 +1,71 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CYCLES_ARM64_H_ +#define _RTE_CYCLES_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +static inline uint64_t +rte_rdtsc(void) +{ + uint64_t tsc; + + asm volatile("mrs %0, cntvct_el0" : "=r" (tsc)); + return tsc; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_ARM64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h new file mode 100644 index 00000000..1d562c3f --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_ARM_H_ +#define _RTE_MEMCPY_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_MEMCPY_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h new file mode 100644 index 00000000..988125b3 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_32.h @@ -0,0 +1,338 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_ARM32_H_ +#define _RTE_MEMCPY_ARM32_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcpy.h" + +#ifdef RTE_ARCH_ARM_NEON_MEMCPY + +#ifndef RTE_MACHINE_CPUFLAG_NEON +#error "Cannot optimize memcpy by NEON as the CPU seems to not support this" +#endif + +/* ARM NEON Intrinsics are used to copy data */ +#include + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + vst1q_u8(dst, vld1q_u8(src)); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + asm volatile ( + "vld1.8 {d0-d3}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : : "memory", "d0", "d1", "d2", "d3"); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d5}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d5}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5"); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d7}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d7}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7"); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + asm volatile ("pld [%0, #64]" : : "r" (src)); + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d7}, [%0]!\n\t" + "vld1.8 {d8-d11}, [%0]!\n\t" + "vld1.8 {d12-d15}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d7}, [%1]!\n\t" + "vst1.8 {d8-d11}, [%1]!\n\t" + "vst1.8 {d12-d15}, [%1]\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15"); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + asm volatile ("pld [%0, #64]" : : "r" (src)); + asm volatile ("pld [%0, #128]" : : "r" (src)); + asm volatile ("pld [%0, #192]" : : "r" (src)); + asm volatile ("pld [%0, #256]" : : "r" (src)); + asm volatile ("pld [%0, #320]" : : "r" (src)); + asm volatile ("pld [%0, #384]" : : "r" (src)); + asm volatile ("pld [%0, #448]" : : "r" (src)); + asm volatile ( + "vld1.8 {d0-d3}, [%0]!\n\t" + "vld1.8 {d4-d7}, [%0]!\n\t" + "vld1.8 {d8-d11}, [%0]!\n\t" + "vld1.8 {d12-d15}, [%0]!\n\t" + "vld1.8 {d16-d19}, [%0]!\n\t" + "vld1.8 {d20-d23}, [%0]!\n\t" + "vld1.8 {d24-d27}, [%0]!\n\t" + "vld1.8 {d28-d31}, [%0]\n\t" + "vst1.8 {d0-d3}, [%1]!\n\t" + "vst1.8 {d4-d7}, [%1]!\n\t" + "vst1.8 {d8-d11}, [%1]!\n\t" + "vst1.8 {d12-d15}, [%1]!\n\t" + "vst1.8 {d16-d19}, [%1]!\n\t" + "vst1.8 {d20-d23}, [%1]!\n\t" + "vst1.8 {d24-d27}, [%1]!\n\t" + "vst1.8 {d28-d31}, [%1]!\n\t" + : "+r" (src), "+r" (dst) + : + : "memory", "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", + "d8", "d9", "d10", "d11", "d12", "d13", "d14", "d15", + "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", + "d24", "d25", "d26", "d27", "d28", "d29", "d30", "d31"); +} + +#define rte_memcpy(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcpy((dst), (src), (n)) : \ + rte_memcpy_func((dst), (src), (n)); }) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* We can't copy < 16 bytes using XMM registers so do it manually. */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + dst = (uint8_t *)dst + 1; + src = (const uint8_t *)src + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + dst = (uint16_t *)dst + 1; + src = (const uint16_t *)src + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + dst = (uint32_t *)dst + 1; + src = (const uint32_t *)src + 1; + } + if (n & 0x08) { + /* ARMv7 can not handle unaligned access to long long + * (uint64_t). Therefore two uint32_t operations are + * used. + */ + *(uint32_t *)dst = *(const uint32_t *)src; + dst = (uint32_t *)dst + 1; + src = (const uint32_t *)src + 1; + *(uint32_t *)dst = *(const uint32_t *)src; + } + return ret; + } + + /* Special fast cases for <= 128 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + return ret; + } + + if (n <= 128) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + + /* + * For large copies > 128 bytes. This combination of 256, 64 and 16 byte + * copies was found to be faster than doing 128 and 32 byte copies as + * well. + */ + for ( ; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /* + * We split the remaining bytes (which will be less than 256) into + * 64byte (2^6) chunks. + * Using incrementing integers in the case labels of a switch statement + * enourages the compiler to use a jump table. To get incrementing + * integers, we shift the 2 relevant bits to the LSB position to first + * get decrementing integers, and then subtract. + */ + switch (3 - (n >> 6)) { + case 0x00: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x01: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x02: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + default: + break; + } + + /* + * We split the remaining bytes (which will be less than 64) into + * 16byte (2^4) chunks, using the same switch structure as above. + */ + switch (3 - (n >> 4)) { + case 0x00: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x01: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x02: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + default: + break; + } + + /* Copy any remaining bytes, without going beyond end of buffers */ + if (n != 0) + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; +} + +#else + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +#endif /* RTE_ARCH_ARM_NEON_MEMCPY */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h new file mode 100644 index 00000000..917cdc1b --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_memcpy_64.h @@ -0,0 +1,93 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCPY_ARM64_H_ +#define _RTE_MEMCPY_ARM64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "generic/rte_memcpy.h" + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_ARM_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h new file mode 100644 index 00000000..aa37de57 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch.h @@ -0,0 +1,42 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_ARM_H_ +#define _RTE_PREFETCH_ARM_H_ + +#ifdef RTE_ARCH_64 +#include +#else +#include +#endif + +#endif /* _RTE_PREFETCH_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h new file mode 100644 index 00000000..5aeed22d --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_32.h @@ -0,0 +1,67 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_ARM32_H_ +#define _RTE_PREFETCH_ARM32_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("pld [%0]" : : "r" (p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("pld [%0]" : : "r" (p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("pld [%0]" : : "r" (p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + /* non-temporal version not available, fallback to rte_prefetch0 */ + rte_prefetch0(p); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_ARM32_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h new file mode 100644 index 00000000..3ed46a46 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_prefetch_64.h @@ -0,0 +1,66 @@ +/* + * BSD LICENSE + * + * Copyright (C) Cavium networks Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_ARM_64_H_ +#define _RTE_PREFETCH_ARM_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("PRFM PLDL1KEEP, [%0]" : : "r" (p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("PRFM PLDL2KEEP, [%0]" : : "r" (p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("PRFM PLDL3KEEP, [%0]" : : "r" (p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + asm volatile ("PRFM PLDL1STRM, [%0]" : : "r" (p)); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_ARM_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_rwlock.h b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h new file mode 100644 index 00000000..664bec88 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_rwlock.h @@ -0,0 +1,40 @@ +/* copied from ppc_64 */ + +#ifndef _RTE_RWLOCK_ARM_H_ +#define _RTE_RWLOCK_ARM_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_unlock(rwl); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_unlock(rwl); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_spinlock.h b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h new file mode 100644 index 00000000..396a42e8 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_spinlock.h @@ -0,0 +1,92 @@ +/* + * BSD LICENSE + * + * Copyright(c) 2015 RehiveTech. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of RehiveTech nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_ARM_H_ +#define _RTE_SPINLOCK_ARM_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_spinlock.h" + +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_ARM_H_ */ diff --git a/lib/librte_eal/common/include/arch/arm/rte_vect.h b/lib/librte_eal/common/include/arch/arm/rte_vect.h new file mode 100644 index 00000000..a33c0544 --- /dev/null +++ b/lib/librte_eal/common/include/arch/arm/rte_vect.h @@ -0,0 +1,83 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Cavium Networks. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Cavium Networks nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_ARM_H_ +#define _RTE_VECT_ARM_H_ + +#include "arm_neon.h" + +#ifdef __cplusplus +extern "C" { +#endif + +typedef int32x4_t xmm_t; + +#define XMM_SIZE (sizeof(xmm_t)) +#define XMM_MASK (XMM_SIZE - 1) + +typedef union rte_xmm { + xmm_t x; + uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; + double pd[XMM_SIZE / sizeof(double)]; +} __attribute__((aligned(16))) rte_xmm_t; + +#ifdef RTE_ARCH_ARM +/* NEON intrinsic vqtbl1q_u8() is not supported in ARMv7-A(AArch32) */ +static __inline uint8x16_t +vqtbl1q_u8(uint8x16_t a, uint8x16_t b) +{ + uint8_t i, pos; + rte_xmm_t rte_a, rte_b, rte_ret; + + vst1q_u8(rte_a.u8, a); + vst1q_u8(rte_b.u8, b); + + for (i = 0; i < 16; i++) { + pos = rte_b.u8[i]; + if (pos < 16) + rte_ret.u8[i] = rte_a.u8[pos]; + else + rte_ret.u8[i] = 0; + } + + return vld1q_u8(rte_ret.u8); +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h new file mode 100644 index 00000000..feae4868 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_atomic.h @@ -0,0 +1,432 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* + * Inspired from FreeBSD src/sys/powerpc/include/atomic.h + * Copyright (c) 2008 Marcel Moolenaar + * Copyright (c) 2001 Benno Rice + * Copyright (c) 2001 David E. O'Brien + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_PPC_64_H_ +#define _RTE_ATOMIC_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + */ +#define rte_mb() {asm volatile("sync" : : : "memory"); } + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + */ +#define rte_wmb() {asm volatile("sync" : : : "memory"); } + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + */ +#define rte_rmb() {asm volatile("sync" : : : "memory"); } + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +/*------------------------- 16 bit atomic operations -------------------------*/ +/* To be compatible with Power7, use GCC built-in functions for 16 bit + * operations */ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + return __atomic_compare_exchange(dst, &exp, &src, 0, __ATOMIC_ACQUIRE, + __ATOMIC_ACQUIRE) ? 1 : 0; +} + +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); +} + +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE); +} + +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + return __atomic_add_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0; +} + +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + return __atomic_sub_fetch(&v->cnt, 1, __ATOMIC_ACQUIRE) == 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + unsigned int ret = 0; + + asm volatile( + "\tlwsync\n" + "1:\tlwarx %[ret], 0, %[dst]\n" + "cmplw %[exp], %[ret]\n" + "bne 2f\n" + "stwcx. %[src], 0, %[dst]\n" + "bne- 1b\n" + "li %[ret], 1\n" + "b 3f\n" + "2:\n" + "stwcx. %[ret], 0, %[dst]\n" + "li %[ret], 0\n" + "3:\n" + "isync\n" + : [ret] "=&r" (ret), "=m" (*dst) + : [dst] "r" (dst), + [exp] "r" (exp), + [src] "r" (src), + "m" (*dst) + : "cc", "memory"); + + return ret; +} + +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + int t; + + asm volatile( + "1: lwarx %[t],0,%[cnt]\n" + "addic %[t],%[t],1\n" + "stwcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "=m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + int t; + + asm volatile( + "1: lwarx %[t],0,%[cnt]\n" + "addic %[t],%[t],-1\n" + "stwcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "=m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + int ret; + + asm volatile( + "\n\tlwsync\n" + "1: lwarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],1\n" + "stwcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} + +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + int ret; + + asm volatile( + "\n\tlwsync\n" + "1: lwarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],-1\n" + "stwcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} +/*------------------------- 64 bit atomic operations -------------------------*/ + +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + unsigned int ret = 0; + + asm volatile ( + "\tlwsync\n" + "1: ldarx %[ret], 0, %[dst]\n" + "cmpld %[exp], %[ret]\n" + "bne 2f\n" + "stdcx. %[src], 0, %[dst]\n" + "bne- 1b\n" + "li %[ret], 1\n" + "b 3f\n" + "2:\n" + "stdcx. %[ret], 0, %[dst]\n" + "li %[ret], 0\n" + "3:\n" + "isync\n" + : [ret] "=&r" (ret), "=m" (*dst) + : [dst] "r" (dst), + [exp] "r" (exp), + [src] "r" (src), + "m" (*dst) + : "cc", "memory"); + return ret; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + v->cnt = 0; +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + long ret; + + asm volatile("ld%U1%X1 %[ret],%[cnt]" + : [ret] "=r"(ret) + : [cnt] "m"(v->cnt)); + + return ret; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + asm volatile("std%U0%X0 %[new_value],%[cnt]" + : [cnt] "=m"(v->cnt) + : [new_value] "r"(new_value)); +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "add %[t],%[inc],%[t]\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "=m" (v->cnt) + : [cnt] "r" (&v->cnt), [inc] "r" (inc), "m" (v->cnt) + : "cc", "memory"); +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "subf %[t],%[dec],%[t]\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "+m" (v->cnt) + : [cnt] "r" (&v->cnt), [dec] "r" (dec), "m" (v->cnt) + : "cc", "memory"); +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "addic %[t],%[t],1\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "+m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + long t; + + asm volatile( + "1: ldarx %[t],0,%[cnt]\n" + "addic %[t],%[t],-1\n" + "stdcx. %[t],0,%[cnt]\n" + "bne- 1b\n" + : [t] "=&r" (t), "+m" (v->cnt) + : [cnt] "r" (&v->cnt), "m" (v->cnt) + : "cc", "xer", "memory"); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "add %[ret],%[inc],%[ret]\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [inc] "r" (inc), [cnt] "r" (&v->cnt) + : "cc", "memory"); + + return ret; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "subf %[ret],%[dec],%[ret]\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [dec] "r" (dec), [cnt] "r" (&v->cnt) + : "cc", "memory"); + + return ret; +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],1\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + long ret; + + asm volatile( + "\n\tlwsync\n" + "1: ldarx %[ret],0,%[cnt]\n" + "addic %[ret],%[ret],-1\n" + "stdcx. %[ret],0,%[cnt]\n" + "bne- 1b\n" + "isync\n" + : [ret] "=&r" (ret) + : [cnt] "r" (&v->cnt) + : "cc", "xer", "memory"); + + return ret == 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +/** + * Atomically set a 64-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + v->cnt = 0; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h new file mode 100644 index 00000000..3c1734ed --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_byteorder.h @@ -0,0 +1,149 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +/* Inspired from FreeBSD src/sys/powerpc/include/endian.h + * Copyright (c) 1987, 1991, 1993 + * The Regents of the University of California. All rights reserved. +*/ + +#ifndef _RTE_BYTEORDER_PPC_64_H_ +#define _RTE_BYTEORDER_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +/* + * An architecture-optimized byte swap for a 16-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap16(). + */ +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + return (_x >> 8) | ((_x << 8) & 0xff00); +} + +/* + * An architecture-optimized byte swap for a 32-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap32(). + */ +static inline uint32_t rte_arch_bswap32(uint32_t _x) +{ + return (_x >> 24) | ((_x >> 8) & 0xff00) | ((_x << 8) & 0xff0000) | + ((_x << 24) & 0xff000000); +} + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* 64-bit mode */ +static inline uint64_t rte_arch_bswap64(uint64_t _x) +{ + return (_x >> 56) | ((_x >> 40) & 0xff00) | ((_x >> 24) & 0xff0000) | + ((_x >> 8) & 0xff000000) | ((_x << 8) & (0xffULL << 32)) | + ((_x << 24) & (0xffULL << 40)) | + ((_x << 40) & (0xffULL << 48)) | ((_x << 56)); +} + +#ifndef RTE_FORCE_INTRINSICS +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) + +#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap32(x) : \ + rte_arch_bswap32(x))) + +#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap64(x) : \ + rte_arch_bswap64(x))) +#else +/* + * __builtin_bswap16 is only available gcc 4.8 and upwards + */ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif +#endif + +/* Power 8 have both little endian and big endian mode + * Power 7 only support big endian + */ +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#else /* RTE_BIG_ENDIAN */ + +#define rte_cpu_to_le_16(x) rte_bswap16(x) +#define rte_cpu_to_le_32(x) rte_bswap32(x) +#define rte_cpu_to_le_64(x) rte_bswap64(x) + +#define rte_cpu_to_be_16(x) (x) +#define rte_cpu_to_be_32(x) (x) +#define rte_cpu_to_be_64(x) (x) + +#define rte_le_to_cpu_16(x) rte_bswap16(x) +#define rte_le_to_cpu_32(x) rte_bswap32(x) +#define rte_le_to_cpu_64(x) rte_bswap64(x) + +#define rte_be_to_cpu_16(x) (x) +#define rte_be_to_cpu_32(x) (x) +#define rte_be_to_cpu_64(x) (x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h new file mode 100644 index 00000000..7cc2b3c5 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cpuflags.h @@ -0,0 +1,88 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CPUFLAGS_PPC_64_H_ +#define _RTE_CPUFLAGS_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_PPC_LE = 0, + RTE_CPUFLAG_TRUE_LE, + RTE_CPUFLAG_PSERIES_PERFMON_COMPAT, + RTE_CPUFLAG_VSX, + RTE_CPUFLAG_ARCH_2_06, + RTE_CPUFLAG_POWER6_EXT, + RTE_CPUFLAG_DFP, + RTE_CPUFLAG_PA6T, + RTE_CPUFLAG_ARCH_2_05, + RTE_CPUFLAG_ICACHE_SNOOP, + RTE_CPUFLAG_SMT, + RTE_CPUFLAG_BOOKE, + RTE_CPUFLAG_CELLBE, + RTE_CPUFLAG_POWER5_PLUS, + RTE_CPUFLAG_POWER5, + RTE_CPUFLAG_POWER4, + RTE_CPUFLAG_NOTB, + RTE_CPUFLAG_EFP_DOUBLE, + RTE_CPUFLAG_EFP_SINGLE, + RTE_CPUFLAG_SPE, + RTE_CPUFLAG_UNIFIED_CACHE, + RTE_CPUFLAG_4xxMAC, + RTE_CPUFLAG_MMU, + RTE_CPUFLAG_FPU, + RTE_CPUFLAG_ALTIVEC, + RTE_CPUFLAG_PPC601, + RTE_CPUFLAG_PPC64, + RTE_CPUFLAG_PPC32, + RTE_CPUFLAG_TAR, + RTE_CPUFLAG_LSEL, + RTE_CPUFLAG_EBB, + RTE_CPUFLAG_DSCR, + RTE_CPUFLAG_HTM, + RTE_CPUFLAG_ARCH_2_07, + /* The last item */ + RTE_CPUFLAG_NUMFLAGS,/**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h new file mode 100644 index 00000000..64beddf9 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_cycles.h @@ -0,0 +1,94 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CYCLES_PPC_64_H_ +#define _RTE_CYCLES_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +#include + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +static inline uint64_t +rte_rdtsc(void) +{ + union { + uint64_t tsc_64; + struct { +#if RTE_BYTE_ORDER == RTE_BIG_ENDIAN + uint32_t hi_32; + uint32_t lo_32; +#else + uint32_t lo_32; + uint32_t hi_32; +#endif + }; + } tsc; + uint32_t tmp; + + asm volatile( + "0:\n" + "mftbu %[hi32]\n" + "mftb %[lo32]\n" + "mftbu %[tmp]\n" + "cmpw %[tmp],%[hi32]\n" + "bne 0b\n" + : [hi32] "=r"(tsc.hi_32), [lo32] "=r"(tsc.lo_32), + [tmp] "=r"(tmp) + ); + return tsc.tsc_64; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h new file mode 100644 index 00000000..acf7aac2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_memcpy.h @@ -0,0 +1,225 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCPY_PPC_64_H_ +#define _RTE_MEMCPY_PPC_64_H_ + +#include +#include +/*To include altivec.h, GCC version must >= 4.8 */ +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_memcpy.h" + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); + vec_vsx_st(vec_vsx_ld(32, src), 32, dst); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); + vec_vsx_st(vec_vsx_ld(32, src), 32, dst); + vec_vsx_st(vec_vsx_ld(48, src), 48, dst); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + vec_vsx_st(vec_vsx_ld(0, src), 0, dst); + vec_vsx_st(vec_vsx_ld(16, src), 16, dst); + vec_vsx_st(vec_vsx_ld(32, src), 32, dst); + vec_vsx_st(vec_vsx_ld(48, src), 48, dst); + vec_vsx_st(vec_vsx_ld(64, src), 64, dst); + vec_vsx_st(vec_vsx_ld(80, src), 80, dst); + vec_vsx_st(vec_vsx_ld(96, src), 96, dst); + vec_vsx_st(vec_vsx_ld(112, src), 112, dst); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov128(dst, src); + rte_mov128(dst + 128, src + 128); +} + +#define rte_memcpy(dst, src, n) \ + ({ (__builtin_constant_p(n)) ? \ + memcpy((dst), (src), (n)) : \ + rte_memcpy_func((dst), (src), (n)); }) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + void *ret = dst; + + /* We can't copy < 16 bytes using XMM registers so do it manually. */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dst = *(const uint8_t *)src; + dst = (uint8_t *)dst + 1; + src = (const uint8_t *)src + 1; + } + if (n & 0x02) { + *(uint16_t *)dst = *(const uint16_t *)src; + dst = (uint16_t *)dst + 1; + src = (const uint16_t *)src + 1; + } + if (n & 0x04) { + *(uint32_t *)dst = *(const uint32_t *)src; + dst = (uint32_t *)dst + 1; + src = (const uint32_t *)src + 1; + } + if (n & 0x08) + *(uint64_t *)dst = *(const uint64_t *)src; + return ret; + } + + /* Special fast cases for <= 128 bytes */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + return ret; + } + + if (n <= 128) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + + /* + * For large copies > 128 bytes. This combination of 256, 64 and 16 byte + * copies was found to be faster than doing 128 and 32 byte copies as + * well. + */ + for ( ; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /* + * We split the remaining bytes (which will be less than 256) into + * 64byte (2^6) chunks. + * Using incrementing integers in the case labels of a switch statement + * enourages the compiler to use a jump table. To get incrementing + * integers, we shift the 2 relevant bits to the LSB position to first + * get decrementing integers, and then subtract. + */ + switch (3 - (n >> 6)) { + case 0x00: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x01: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + case 0x02: + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + n -= 64; + dst = (uint8_t *)dst + 64; + src = (const uint8_t *)src + 64; /* fallthrough */ + default: + ; + } + + /* + * We split the remaining bytes (which will be less than 64) into + * 16byte (2^4) chunks, using the same switch structure as above. + */ + switch (3 - (n >> 4)) { + case 0x00: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x01: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + case 0x02: + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + n -= 16; + dst = (uint8_t *)dst + 16; + src = (const uint8_t *)src + 16; /* fallthrough */ + default: + ; + } + + /* Copy any remaining bytes, without going beyond end of buffers */ + if (n != 0) + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h new file mode 100644 index 00000000..9a1995ea --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_prefetch.h @@ -0,0 +1,67 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_PREFETCH_PPC_64_H_ +#define _RTE_PREFETCH_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("dcbt 0,%[p],0" : : [p] "r" (p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + /* non-temporal version not available, fallback to rte_prefetch0 */ + rte_prefetch0(p); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h new file mode 100644 index 00000000..de8af19e --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_rwlock.h @@ -0,0 +1,38 @@ +#ifndef _RTE_RWLOCK_PPC_64_H_ +#define _RTE_RWLOCK_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_unlock(rwl); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_unlock(rwl); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h new file mode 100644 index 00000000..af139c9d --- /dev/null +++ b/lib/librte_eal/common/include/arch/ppc_64/rte_spinlock.h @@ -0,0 +1,114 @@ +/* + * BSD LICENSE + * + * Copyright (C) IBM Corporation 2014. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of IBM Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_SPINLOCK_PPC_64_H_ +#define _RTE_SPINLOCK_PPC_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_spinlock.h" + +/* Fixme: Use intrinsics to implement the spinlock on Power architecture */ + +#ifndef RTE_FORCE_INTRINSICS + +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + while (__sync_lock_test_and_set(&sl->locked, 1)) + while (sl->locked) + rte_pause(); +} + +static inline void +rte_spinlock_unlock(rte_spinlock_t *sl) +{ + __sync_lock_release(&sl->locked); +} + +static inline int +rte_spinlock_trylock(rte_spinlock_t *sl) +{ + return __sync_lock_test_and_set(&sl->locked, 1) == 0; +} + +#endif + +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_PPC_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_atomic.h b/lib/librte_eal/common/include/arch/tile/rte_atomic.h new file mode 100644 index 00000000..28825ff6 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_atomic.h @@ -0,0 +1,92 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_ATOMIC_TILE_H_ +#define _RTE_ATOMIC_TILE_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_atomic.h" + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void) +{ + __sync_synchronize(); +} + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void) +{ + __sync_synchronize(); +} + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void) +{ + __sync_synchronize(); +} + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_byteorder.h b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h new file mode 100644 index 00000000..7239e437 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_byteorder.h @@ -0,0 +1,91 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_BYTEORDER_TILE_H_ +#define _RTE_BYTEORDER_TILE_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +#if !(__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8)) +#define rte_bswap16(x) rte_constant_bswap16(x) +#endif + +#if RTE_BYTE_ORDER == RTE_LITTLE_ENDIAN + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#else /* RTE_BIG_ENDIAN */ + +#define rte_cpu_to_le_16(x) rte_bswap16(x) +#define rte_cpu_to_le_32(x) rte_bswap32(x) +#define rte_cpu_to_le_64(x) rte_bswap64(x) + +#define rte_cpu_to_be_16(x) (x) +#define rte_cpu_to_be_32(x) (x) +#define rte_cpu_to_be_64(x) (x) + +#define rte_le_to_cpu_16(x) rte_bswap16(x) +#define rte_le_to_cpu_32(x) rte_bswap32(x) +#define rte_le_to_cpu_64(x) rte_bswap64(x) + +#define rte_be_to_cpu_16(x) (x) +#define rte_be_to_cpu_32(x) (x) +#define rte_be_to_cpu_64(x) (x) +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h new file mode 100644 index 00000000..1849b520 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_cpuflags.h @@ -0,0 +1,53 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CPUFLAGS_TILE_H_ +#define _RTE_CPUFLAGS_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t { + RTE_CPUFLAG_NUMFLAGS /**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_cycles.h b/lib/librte_eal/common/include/arch/tile/rte_cycles.h new file mode 100644 index 00000000..0b2200a3 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_cycles.h @@ -0,0 +1,70 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_CYCLES_TILE_H_ +#define _RTE_CYCLES_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +#include "generic/rte_cycles.h" + +/** + * Read the time base register. + * + * @return + * The time base for this lcore. + */ +static inline uint64_t +rte_rdtsc(void) +{ + return get_cycle_count(); +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_memcpy.h b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h new file mode 100644 index 00000000..9b5b37ef --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_memcpy.h @@ -0,0 +1,93 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_MEMCPY_TILE_H_ +#define _RTE_MEMCPY_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include "generic/rte_memcpy.h" + +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 16); +} + +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 32); +} + +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 48); +} + +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 64); +} + +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 128); +} + +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + memcpy(dst, src, 256); +} + +#define rte_memcpy(d, s, n) memcpy((d), (s), (n)) + +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) +{ + return memcpy(dst, src, n); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_prefetch.h b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h new file mode 100644 index 00000000..7a1bb93e --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_prefetch.h @@ -0,0 +1,67 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_PREFETCH_TILE_H_ +#define _RTE_PREFETCH_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + __builtin_prefetch((const void *)(uintptr_t)p, 0, 3); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + __builtin_prefetch((const void *)(uintptr_t)p, 0, 2); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + __builtin_prefetch((const void *)(uintptr_t)p, 0, 1); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + /* non-temporal version not available, fallback to rte_prefetch0 */ + rte_prefetch0(p); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_rwlock.h b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h new file mode 100644 index 00000000..8f67a190 --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_rwlock.h @@ -0,0 +1,70 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_RWLOCK_TILE_H_ +#define _RTE_RWLOCK_TILE_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_read_unlock(rwl); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + rte_rwlock_write_unlock(rwl); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/tile/rte_spinlock.h b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h new file mode 100644 index 00000000..e91f99ee --- /dev/null +++ b/lib/librte_eal/common/include/arch/tile/rte_spinlock.h @@ -0,0 +1,92 @@ +/* + * BSD LICENSE + * + * Copyright (C) EZchip Semiconductor Ltd. 2015. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of EZchip Semiconductor nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _RTE_SPINLOCK_TILE_H_ +#define _RTE_SPINLOCK_TILE_H_ + +#ifndef RTE_FORCE_INTRINSICS +# error Platform must be built with CONFIG_RTE_FORCE_INTRINSICS +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_spinlock.h" + +static inline int rte_tm_supported(void) +{ + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + rte_spinlock_unlock(sl); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_recursive_unlock(slr); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + return rte_spinlock_recursive_trylock(slr); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_TILE_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic.h b/lib/librte_eal/common/include/arch/x86/rte_atomic.h new file mode 100644 index 00000000..b20056b8 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_X86_H_ +#define _RTE_ATOMIC_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include "generic/rte_atomic.h" + +#if RTE_MAX_LCORE == 1 +#define MPLOCKED /**< No need to insert MP lock prefix. */ +#else +#define MPLOCKED "lock ; " /**< Insert MP lock prefix. */ +#endif + +#define rte_mb() _mm_mfence() + +#define rte_wmb() _mm_sfence() + +#define rte_rmb() _mm_lfence() + +#define rte_smp_mb() rte_mb() + +#define rte_smp_wmb() rte_compiler_barrier() + +#define rte_smp_rmb() rte_compiler_barrier() + +/*------------------------- 16 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + uint8_t res; + + asm volatile( + MPLOCKED + "cmpxchgw %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + return res; +} + +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + asm volatile( + MPLOCKED + "incw %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + asm volatile( + MPLOCKED + "decw %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incw %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + uint8_t ret; + + asm volatile(MPLOCKED + "decw %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + uint8_t res; + + asm volatile( + MPLOCKED + "cmpxchgl %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + return res; +} + +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} + +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + asm volatile( + MPLOCKED + "incl %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + asm volatile( + MPLOCKED + "decl %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incl %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + uint8_t ret; + + asm volatile(MPLOCKED + "decl %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} +#endif + +#ifdef RTE_ARCH_I686 +#include "rte_atomic_32.h" +#else +#include "rte_atomic_64.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ATOMIC_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h new file mode 100644 index 00000000..400d8a96 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_32.h @@ -0,0 +1,222 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Inspired from FreeBSD src/sys/i386/include/atomic.h + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_I686_H_ +#define _RTE_ATOMIC_I686_H_ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + uint8_t res; + union { + struct { + uint32_t l32; + uint32_t h32; + }; + uint64_t u64; + } _exp, _src; + + _exp.u64 = exp; + _src.u64 = src; + +#ifndef __PIC__ + asm volatile ( + MPLOCKED + "cmpxchg8b (%[dst]);" + "setz %[res];" + : [res] "=a" (res) /* result in eax */ + : [dst] "S" (dst), /* esi */ + "b" (_src.l32), /* ebx */ + "c" (_src.h32), /* ecx */ + "a" (_exp.l32), /* eax */ + "d" (_exp.h32) /* edx */ + : "memory" ); /* no-clobber list */ +#else + asm volatile ( + "mov %%ebx, %%edi\n" + MPLOCKED + "cmpxchg8b (%[dst]);" + "setz %[res];" + "xchgl %%ebx, %%edi;\n" + : [res] "=a" (res) /* result in eax */ + : [dst] "S" (dst), /* esi */ + "D" (_src.l32), /* ebx */ + "c" (_src.h32), /* ecx */ + "a" (_exp.l32), /* eax */ + "d" (_exp.h32) /* edx */ + : "memory" ); /* no-clobber list */ +#endif + + return res; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp + inc); + } +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp - dec); + } +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp + inc); + } + + return tmp + inc; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp - dec); + } + + return tmp - dec; +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} +#endif + +#endif /* _RTE_ATOMIC_I686_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h new file mode 100644 index 00000000..4de66000 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_atomic_64.h @@ -0,0 +1,191 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/* + * Inspired from FreeBSD src/sys/amd64/include/atomic.h + * Copyright (c) 1998 Doug Rabson + * All rights reserved. + */ + +#ifndef _RTE_ATOMIC_X86_64_H_ +#define _RTE_ATOMIC_X86_64_H_ + +/*------------------------- 64 bit atomic operations -------------------------*/ + +#ifndef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + uint8_t res; + + + asm volatile( + MPLOCKED + "cmpxchgq %[src], %[dst];" + "sete %[res];" + : [res] "=a" (res), /* output */ + [dst] "=m" (*dst) + : [src] "r" (src), /* input */ + "a" (exp), + "m" (*dst) + : "memory"); /* no-clobber list */ + + return res; +} + +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ + v->cnt = 0; +} + +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ + return v->cnt; +} + +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ + v->cnt = new_value; +} + +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + asm volatile( + MPLOCKED + "addq %[inc], %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : [inc] "ir" (inc), /* input */ + "m" (v->cnt) + ); +} + +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + asm volatile( + MPLOCKED + "subq %[dec], %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : [dec] "ir" (dec), /* input */ + "m" (v->cnt) + ); +} + +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + asm volatile( + MPLOCKED + "incq %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + asm volatile( + MPLOCKED + "decq %[cnt]" + : [cnt] "=m" (v->cnt) /* output */ + : "m" (v->cnt) /* input */ + ); +} + +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + int64_t prev = inc; + + asm volatile( + MPLOCKED + "xaddq %[prev], %[cnt]" + : [prev] "+r" (prev), /* output */ + [cnt] "=m" (v->cnt) + : "m" (v->cnt) /* input */ + ); + return prev + inc; +} + +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return rte_atomic64_add_return(v, -dec); +} + +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "incq %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + + return ret != 0; +} + +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + uint8_t ret; + + asm volatile( + MPLOCKED + "decq %[cnt] ; " + "sete %[ret]" + : [cnt] "+m" (v->cnt), /* output */ + [ret] "=qm" (ret) + ); + return ret != 0; +} + +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} + +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + v->cnt = 0; +} +#endif + +#endif /* _RTE_ATOMIC_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h new file mode 100644 index 00000000..ffdb6ef5 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder.h @@ -0,0 +1,125 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_X86_H_ +#define _RTE_BYTEORDER_X86_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_byteorder.h" + +#ifndef RTE_BYTE_ORDER +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif + +/* + * An architecture-optimized byte swap for a 16-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap16(). + */ +static inline uint16_t rte_arch_bswap16(uint16_t _x) +{ + register uint16_t x = _x; + asm volatile ("xchgb %b[x1],%h[x2]" + : [x1] "=Q" (x) + : [x2] "0" (x) + ); + return x; +} + +/* + * An architecture-optimized byte swap for a 32-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap32(). + */ +static inline uint32_t rte_arch_bswap32(uint32_t _x) +{ + register uint32_t x = _x; + asm volatile ("bswap %[x]" + : [x] "+r" (x) + ); + return x; +} + +#ifndef RTE_FORCE_INTRINSICS +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) + +#define rte_bswap32(x) ((uint32_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap32(x) : \ + rte_arch_bswap32(x))) + +#define rte_bswap64(x) ((uint64_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap64(x) : \ + rte_arch_bswap64(x))) +#else +/* + * __builtin_bswap16 is only available gcc 4.8 and upwards + */ +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 8) +#define rte_bswap16(x) ((uint16_t)(__builtin_constant_p(x) ? \ + rte_constant_bswap16(x) : \ + rte_arch_bswap16(x))) +#endif +#endif + +#define rte_cpu_to_le_16(x) (x) +#define rte_cpu_to_le_32(x) (x) +#define rte_cpu_to_le_64(x) (x) + +#define rte_cpu_to_be_16(x) rte_bswap16(x) +#define rte_cpu_to_be_32(x) rte_bswap32(x) +#define rte_cpu_to_be_64(x) rte_bswap64(x) + +#define rte_le_to_cpu_16(x) (x) +#define rte_le_to_cpu_32(x) (x) +#define rte_le_to_cpu_64(x) (x) + +#define rte_be_to_cpu_16(x) rte_bswap16(x) +#define rte_be_to_cpu_32(x) rte_bswap32(x) +#define rte_be_to_cpu_64(x) rte_bswap64(x) + +#ifdef RTE_ARCH_I686 +#include "rte_byteorder_32.h" +#else +#include "rte_byteorder_64.h" +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_BYTEORDER_X86_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h new file mode 100644 index 00000000..51c306f8 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_32.h @@ -0,0 +1,51 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_I686_H_ +#define _RTE_BYTEORDER_I686_H_ + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* Compat./Leg. mode */ +static inline uint64_t rte_arch_bswap64(uint64_t x) +{ + uint64_t ret = 0; + ret |= ((uint64_t)rte_arch_bswap32(x & 0xffffffffUL) << 32); + ret |= ((uint64_t)rte_arch_bswap32((x >> 32) & 0xffffffffUL)); + return ret; +} + +#endif /* _RTE_BYTEORDER_I686_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h new file mode 100644 index 00000000..dda572bd --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_byteorder_64.h @@ -0,0 +1,52 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_X86_64_H_ +#define _RTE_BYTEORDER_X86_64_H_ + +/* + * An architecture-optimized byte swap for a 64-bit value. + * + * Do not use this function directly. The preferred function is rte_bswap64(). + */ +/* 64-bit mode */ +static inline uint64_t rte_arch_bswap64(uint64_t _x) +{ + register uint64_t x = _x; + asm volatile ("bswap %[x]" + : [x] "+r" (x) + ); + return x; +} + +#endif /* _RTE_BYTEORDER_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h new file mode 100644 index 00000000..26204fab --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_cpuflags.h @@ -0,0 +1,153 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_X86_64_H_ +#define _RTE_CPUFLAGS_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +enum rte_cpu_flag_t { + /* (EAX 01h) ECX features*/ + RTE_CPUFLAG_SSE3 = 0, /**< SSE3 */ + RTE_CPUFLAG_PCLMULQDQ, /**< PCLMULQDQ */ + RTE_CPUFLAG_DTES64, /**< DTES64 */ + RTE_CPUFLAG_MONITOR, /**< MONITOR */ + RTE_CPUFLAG_DS_CPL, /**< DS_CPL */ + RTE_CPUFLAG_VMX, /**< VMX */ + RTE_CPUFLAG_SMX, /**< SMX */ + RTE_CPUFLAG_EIST, /**< EIST */ + RTE_CPUFLAG_TM2, /**< TM2 */ + RTE_CPUFLAG_SSSE3, /**< SSSE3 */ + RTE_CPUFLAG_CNXT_ID, /**< CNXT_ID */ + RTE_CPUFLAG_FMA, /**< FMA */ + RTE_CPUFLAG_CMPXCHG16B, /**< CMPXCHG16B */ + RTE_CPUFLAG_XTPR, /**< XTPR */ + RTE_CPUFLAG_PDCM, /**< PDCM */ + RTE_CPUFLAG_PCID, /**< PCID */ + RTE_CPUFLAG_DCA, /**< DCA */ + RTE_CPUFLAG_SSE4_1, /**< SSE4_1 */ + RTE_CPUFLAG_SSE4_2, /**< SSE4_2 */ + RTE_CPUFLAG_X2APIC, /**< X2APIC */ + RTE_CPUFLAG_MOVBE, /**< MOVBE */ + RTE_CPUFLAG_POPCNT, /**< POPCNT */ + RTE_CPUFLAG_TSC_DEADLINE, /**< TSC_DEADLINE */ + RTE_CPUFLAG_AES, /**< AES */ + RTE_CPUFLAG_XSAVE, /**< XSAVE */ + RTE_CPUFLAG_OSXSAVE, /**< OSXSAVE */ + RTE_CPUFLAG_AVX, /**< AVX */ + RTE_CPUFLAG_F16C, /**< F16C */ + RTE_CPUFLAG_RDRAND, /**< RDRAND */ + + /* (EAX 01h) EDX features */ + RTE_CPUFLAG_FPU, /**< FPU */ + RTE_CPUFLAG_VME, /**< VME */ + RTE_CPUFLAG_DE, /**< DE */ + RTE_CPUFLAG_PSE, /**< PSE */ + RTE_CPUFLAG_TSC, /**< TSC */ + RTE_CPUFLAG_MSR, /**< MSR */ + RTE_CPUFLAG_PAE, /**< PAE */ + RTE_CPUFLAG_MCE, /**< MCE */ + RTE_CPUFLAG_CX8, /**< CX8 */ + RTE_CPUFLAG_APIC, /**< APIC */ + RTE_CPUFLAG_SEP, /**< SEP */ + RTE_CPUFLAG_MTRR, /**< MTRR */ + RTE_CPUFLAG_PGE, /**< PGE */ + RTE_CPUFLAG_MCA, /**< MCA */ + RTE_CPUFLAG_CMOV, /**< CMOV */ + RTE_CPUFLAG_PAT, /**< PAT */ + RTE_CPUFLAG_PSE36, /**< PSE36 */ + RTE_CPUFLAG_PSN, /**< PSN */ + RTE_CPUFLAG_CLFSH, /**< CLFSH */ + RTE_CPUFLAG_DS, /**< DS */ + RTE_CPUFLAG_ACPI, /**< ACPI */ + RTE_CPUFLAG_MMX, /**< MMX */ + RTE_CPUFLAG_FXSR, /**< FXSR */ + RTE_CPUFLAG_SSE, /**< SSE */ + RTE_CPUFLAG_SSE2, /**< SSE2 */ + RTE_CPUFLAG_SS, /**< SS */ + RTE_CPUFLAG_HTT, /**< HTT */ + RTE_CPUFLAG_TM, /**< TM */ + RTE_CPUFLAG_PBE, /**< PBE */ + + /* (EAX 06h) EAX features */ + RTE_CPUFLAG_DIGTEMP, /**< DIGTEMP */ + RTE_CPUFLAG_TRBOBST, /**< TRBOBST */ + RTE_CPUFLAG_ARAT, /**< ARAT */ + RTE_CPUFLAG_PLN, /**< PLN */ + RTE_CPUFLAG_ECMD, /**< ECMD */ + RTE_CPUFLAG_PTM, /**< PTM */ + + /* (EAX 06h) ECX features */ + RTE_CPUFLAG_MPERF_APERF_MSR, /**< MPERF_APERF_MSR */ + RTE_CPUFLAG_ACNT2, /**< ACNT2 */ + RTE_CPUFLAG_ENERGY_EFF, /**< ENERGY_EFF */ + + /* (EAX 07h, ECX 0h) EBX features */ + RTE_CPUFLAG_FSGSBASE, /**< FSGSBASE */ + RTE_CPUFLAG_BMI1, /**< BMI1 */ + RTE_CPUFLAG_HLE, /**< Hardware Lock elision */ + RTE_CPUFLAG_AVX2, /**< AVX2 */ + RTE_CPUFLAG_SMEP, /**< SMEP */ + RTE_CPUFLAG_BMI2, /**< BMI2 */ + RTE_CPUFLAG_ERMS, /**< ERMS */ + RTE_CPUFLAG_INVPCID, /**< INVPCID */ + RTE_CPUFLAG_RTM, /**< Transactional memory */ + RTE_CPUFLAG_AVX512F, /**< AVX512F */ + + /* (EAX 80000001h) ECX features */ + RTE_CPUFLAG_LAHF_SAHF, /**< LAHF_SAHF */ + RTE_CPUFLAG_LZCNT, /**< LZCNT */ + + /* (EAX 80000001h) EDX features */ + RTE_CPUFLAG_SYSCALL, /**< SYSCALL */ + RTE_CPUFLAG_XD, /**< XD */ + RTE_CPUFLAG_1GB_PG, /**< 1GB_PG */ + RTE_CPUFLAG_RDTSCP, /**< RDTSCP */ + RTE_CPUFLAG_EM64T, /**< EM64T */ + + /* (EAX 80000007h) EDX features */ + RTE_CPUFLAG_INVTSC, /**< INVTSC */ + + /* The last item */ + RTE_CPUFLAG_NUMFLAGS, /**< This should always be the last! */ +}; + +#include "generic/rte_cpuflags.h" + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CPUFLAGS_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_cycles.h b/lib/librte_eal/common/include/arch/x86/rte_cycles.h new file mode 100644 index 00000000..6e3c7d89 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_cycles.h @@ -0,0 +1,121 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_X86_64_H_ +#define _RTE_CYCLES_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_cycles.h" + +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT +/* Global switch to use VMWARE mapping of TSC instead of RDTSC */ +extern int rte_cycles_vmware_tsc_map; +#include +#endif + +static inline uint64_t +rte_rdtsc(void) +{ + union { + uint64_t tsc_64; + struct { + uint32_t lo_32; + uint32_t hi_32; + }; + } tsc; + +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + if (unlikely(rte_cycles_vmware_tsc_map)) { + /* ecx = 0x10000 corresponds to the physical TSC for VMware */ + asm volatile("rdpmc" : + "=a" (tsc.lo_32), + "=d" (tsc.hi_32) : + "c"(0x10000)); + return tsc.tsc_64; + } +#endif + + asm volatile("rdtsc" : + "=a" (tsc.lo_32), + "=d" (tsc.hi_32)); + return tsc.tsc_64; +} + +static inline uint64_t +rte_rdtsc_precise(void) +{ + rte_mb(); + return rte_rdtsc(); +} + +static inline uint64_t +rte_get_tsc_cycles(void) { return rte_rdtsc(); } + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_CYCLES_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_memcpy.h b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h new file mode 100644 index 00000000..f463ab30 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_memcpy.h @@ -0,0 +1,884 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_X86_64_H_ +#define _RTE_MEMCPY_X86_64_H_ + +/** + * @file + * + * Functions for SSE/AVX/AVX2/AVX512 implementation of memcpy(). + */ + +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Copy bytes from one location to another. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + * @param n + * Number of bytes to copy. + * @return + * Pointer to the destination data. + */ +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) __attribute__((always_inline)); + +#ifdef RTE_MACHINE_CPUFLAG_AVX512F + +/** + * AVX512 implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + __m256i ymm0; + + ymm0 = _mm256_loadu_si256((const __m256i *)src); + _mm256_storeu_si256((__m256i *)dst, ymm0); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + __m512i zmm0; + + zmm0 = _mm512_loadu_si512((const void *)src); + _mm512_storeu_si512((void *)dst, zmm0); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov64(dst + 0 * 64, src + 0 * 64); + rte_mov64(dst + 1 * 64, src + 1 * 64); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov64(dst + 0 * 64, src + 0 * 64); + rte_mov64(dst + 1 * 64, src + 1 * 64); + rte_mov64(dst + 2 * 64, src + 2 * 64); + rte_mov64(dst + 3 * 64, src + 3 * 64); +} + +/** + * Copy 128-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m512i zmm0, zmm1; + + while (n >= 128) { + zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64)); + n -= 128; + zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64)); + src = src + 128; + _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0); + _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1); + dst = dst + 128; + } +} + +/** + * Copy 512-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov512blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m512i zmm0, zmm1, zmm2, zmm3, zmm4, zmm5, zmm6, zmm7; + + while (n >= 512) { + zmm0 = _mm512_loadu_si512((const void *)(src + 0 * 64)); + n -= 512; + zmm1 = _mm512_loadu_si512((const void *)(src + 1 * 64)); + zmm2 = _mm512_loadu_si512((const void *)(src + 2 * 64)); + zmm3 = _mm512_loadu_si512((const void *)(src + 3 * 64)); + zmm4 = _mm512_loadu_si512((const void *)(src + 4 * 64)); + zmm5 = _mm512_loadu_si512((const void *)(src + 5 * 64)); + zmm6 = _mm512_loadu_si512((const void *)(src + 6 * 64)); + zmm7 = _mm512_loadu_si512((const void *)(src + 7 * 64)); + src = src + 512; + _mm512_storeu_si512((void *)(dst + 0 * 64), zmm0); + _mm512_storeu_si512((void *)(dst + 1 * 64), zmm1); + _mm512_storeu_si512((void *)(dst + 2 * 64), zmm2); + _mm512_storeu_si512((void *)(dst + 3 * 64), zmm3); + _mm512_storeu_si512((void *)(dst + 4 * 64), zmm4); + _mm512_storeu_si512((void *)(dst + 5 * 64), zmm5); + _mm512_storeu_si512((void *)(dst + 6 * 64), zmm6); + _mm512_storeu_si512((void *)(dst + 7 * 64), zmm7); + dst = dst + 512; + } +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) + *(uint64_t *)dstu = *(const uint64_t *)srcu; + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, + (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, + (const uint8_t *)src - 32 + n); + return ret; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } +COPY_BLOCK_128_BACK63: + if (n > 64) { + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + if (n > 0) + rte_mov64((uint8_t *)dst - 64 + n, + (const uint8_t *)src - 64 + n); + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes + */ + dstofss = ((uintptr_t)dst & 0x3F); + if (dstofss > 0) { + dstofss = 64 - dstofss; + n -= dstofss; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + } + + /** + * Copy 512-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + rte_mov512blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 511; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + + /** + * Copy 128-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + if (n >= 128) { + rte_mov128blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 127; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_128_BACK63; +} + +#elif defined RTE_MACHINE_CPUFLAG_AVX2 + +/** + * AVX2 implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + __m256i ymm0; + + ymm0 = _mm256_loadu_si256((const __m256i *)src); + _mm256_storeu_si256((__m256i *)dst, ymm0); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); + rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); + rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov32((uint8_t *)dst + 0 * 32, (const uint8_t *)src + 0 * 32); + rte_mov32((uint8_t *)dst + 1 * 32, (const uint8_t *)src + 1 * 32); + rte_mov32((uint8_t *)dst + 2 * 32, (const uint8_t *)src + 2 * 32); + rte_mov32((uint8_t *)dst + 3 * 32, (const uint8_t *)src + 3 * 32); + rte_mov32((uint8_t *)dst + 4 * 32, (const uint8_t *)src + 4 * 32); + rte_mov32((uint8_t *)dst + 5 * 32, (const uint8_t *)src + 5 * 32); + rte_mov32((uint8_t *)dst + 6 * 32, (const uint8_t *)src + 6 * 32); + rte_mov32((uint8_t *)dst + 7 * 32, (const uint8_t *)src + 7 * 32); +} + +/** + * Copy 64-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m256i ymm0, ymm1; + + while (n >= 64) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); + n -= 64; + ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); + src = (const uint8_t *)src + 64; + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); + dst = (uint8_t *)dst + 64; + } +} + +/** + * Copy 256-byte blocks from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256blocks(uint8_t *dst, const uint8_t *src, size_t n) +{ + __m256i ymm0, ymm1, ymm2, ymm3, ymm4, ymm5, ymm6, ymm7; + + while (n >= 256) { + ymm0 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 0 * 32)); + n -= 256; + ymm1 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 1 * 32)); + ymm2 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 2 * 32)); + ymm3 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 3 * 32)); + ymm4 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 4 * 32)); + ymm5 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 5 * 32)); + ymm6 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 6 * 32)); + ymm7 = _mm256_loadu_si256((const __m256i *)((const uint8_t *)src + 7 * 32)); + src = (const uint8_t *)src + 256; + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 0 * 32), ymm0); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 1 * 32), ymm1); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 2 * 32), ymm2); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 3 * 32), ymm3); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 4 * 32), ymm4); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 5 * 32), ymm5); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 6 * 32), ymm6); + _mm256_storeu_si256((__m256i *)((uint8_t *)dst + 7 * 32), ymm7); + dst = (uint8_t *)dst + 256; + } +} + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t bits; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + return ret; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } + if (n >= 64) { + n -= 64; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 64; + dst = (uint8_t *)dst + 64; + } +COPY_BLOCK_64_BACK31: + if (n > 32) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + return ret; + } + if (n > 0) { + rte_mov32((uint8_t *)dst - 32 + n, (const uint8_t *)src - 32 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes + */ + dstofss = (uintptr_t)dst & 0x1F; + if (dstofss > 0) { + dstofss = 32 - dstofss; + n -= dstofss; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + } + + /** + * Copy 256-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + rte_mov256blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 255; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + + /** + * Copy 64-byte blocks. + * Use copy block function for better instruction order control, + * which is important when load is unaligned. + */ + if (n >= 64) { + rte_mov64blocks((uint8_t *)dst, (const uint8_t *)src, n); + bits = n; + n = n & 63; + bits -= n; + src = (const uint8_t *)src + bits; + dst = (uint8_t *)dst + bits; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK31; +} + +#else /* RTE_MACHINE_CPUFLAG */ + +/** + * SSE & AVX implementation below + */ + +/** + * Copy 16 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src) +{ + __m128i xmm0; + + xmm0 = _mm_loadu_si128((const __m128i *)(const __m128i *)src); + _mm_storeu_si128((__m128i *)dst, xmm0); +} + +/** + * Copy 32 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); +} + +/** + * Copy 64 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); +} + +/** + * Copy 128 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); + rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); + rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); + rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); + rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); +} + +/** + * Copy 256 bytes from one location to another, + * locations should not overlap. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src) +{ + rte_mov16((uint8_t *)dst + 0 * 16, (const uint8_t *)src + 0 * 16); + rte_mov16((uint8_t *)dst + 1 * 16, (const uint8_t *)src + 1 * 16); + rte_mov16((uint8_t *)dst + 2 * 16, (const uint8_t *)src + 2 * 16); + rte_mov16((uint8_t *)dst + 3 * 16, (const uint8_t *)src + 3 * 16); + rte_mov16((uint8_t *)dst + 4 * 16, (const uint8_t *)src + 4 * 16); + rte_mov16((uint8_t *)dst + 5 * 16, (const uint8_t *)src + 5 * 16); + rte_mov16((uint8_t *)dst + 6 * 16, (const uint8_t *)src + 6 * 16); + rte_mov16((uint8_t *)dst + 7 * 16, (const uint8_t *)src + 7 * 16); + rte_mov16((uint8_t *)dst + 8 * 16, (const uint8_t *)src + 8 * 16); + rte_mov16((uint8_t *)dst + 9 * 16, (const uint8_t *)src + 9 * 16); + rte_mov16((uint8_t *)dst + 10 * 16, (const uint8_t *)src + 10 * 16); + rte_mov16((uint8_t *)dst + 11 * 16, (const uint8_t *)src + 11 * 16); + rte_mov16((uint8_t *)dst + 12 * 16, (const uint8_t *)src + 12 * 16); + rte_mov16((uint8_t *)dst + 13 * 16, (const uint8_t *)src + 13 * 16); + rte_mov16((uint8_t *)dst + 14 * 16, (const uint8_t *)src + 14 * 16); + rte_mov16((uint8_t *)dst + 15 * 16, (const uint8_t *)src + 15 * 16); +} + +/** + * Macro for copying unaligned block from one location to another with constant load offset, + * 47 bytes leftover maximum, + * locations should not overlap. + * Requirements: + * - Store is aligned + * - Load offset is , which must be immediate value within [1, 15] + * - For , make sure bit backwards & <16 - offset> bit forwards are available for loading + * - , , must be variables + * - __m128i ~ must be pre-defined + */ +#define MOVEUNALIGNED_LEFT47_IMM(dst, src, len, offset) \ +({ \ + int tmp; \ + while (len >= 128 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 128; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ + xmm3 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 3 * 16)); \ + xmm4 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 4 * 16)); \ + xmm5 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 5 * 16)); \ + xmm6 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 6 * 16)); \ + xmm7 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 7 * 16)); \ + xmm8 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 8 * 16)); \ + src = (const uint8_t *)src + 128; \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 2 * 16), _mm_alignr_epi8(xmm3, xmm2, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 3 * 16), _mm_alignr_epi8(xmm4, xmm3, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 4 * 16), _mm_alignr_epi8(xmm5, xmm4, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 5 * 16), _mm_alignr_epi8(xmm6, xmm5, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 6 * 16), _mm_alignr_epi8(xmm7, xmm6, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 7 * 16), _mm_alignr_epi8(xmm8, xmm7, offset)); \ + dst = (uint8_t *)dst + 128; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 127) + 16 - offset; \ + tmp -= len; \ + src = (const uint8_t *)src + tmp; \ + dst = (uint8_t *)dst + tmp; \ + if (len >= 32 + 16 - offset) { \ + while (len >= 32 + 16 - offset) { \ + xmm0 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 0 * 16)); \ + len -= 32; \ + xmm1 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 1 * 16)); \ + xmm2 = _mm_loadu_si128((const __m128i *)((const uint8_t *)src - offset + 2 * 16)); \ + src = (const uint8_t *)src + 32; \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 0 * 16), _mm_alignr_epi8(xmm1, xmm0, offset)); \ + _mm_storeu_si128((__m128i *)((uint8_t *)dst + 1 * 16), _mm_alignr_epi8(xmm2, xmm1, offset)); \ + dst = (uint8_t *)dst + 32; \ + } \ + tmp = len; \ + len = ((len - 16 + offset) & 31) + 16 - offset; \ + tmp -= len; \ + src = (const uint8_t *)src + tmp; \ + dst = (uint8_t *)dst + tmp; \ + } \ +}) + +/** + * Macro for copying unaligned block from one location to another, + * 47 bytes leftover maximum, + * locations should not overlap. + * Use switch here because the aligning instruction requires immediate value for shift count. + * Requirements: + * - Store is aligned + * - Load offset is , which must be within [1, 15] + * - For , make sure bit backwards & <16 - offset> bit forwards are available for loading + * - , , must be variables + * - __m128i ~ used in MOVEUNALIGNED_LEFT47_IMM must be pre-defined + */ +#define MOVEUNALIGNED_LEFT47(dst, src, len, offset) \ +({ \ + switch (offset) { \ + case 0x01: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x01); break; \ + case 0x02: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x02); break; \ + case 0x03: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x03); break; \ + case 0x04: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x04); break; \ + case 0x05: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x05); break; \ + case 0x06: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x06); break; \ + case 0x07: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x07); break; \ + case 0x08: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x08); break; \ + case 0x09: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x09); break; \ + case 0x0A: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0A); break; \ + case 0x0B: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0B); break; \ + case 0x0C: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0C); break; \ + case 0x0D: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0D); break; \ + case 0x0E: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0E); break; \ + case 0x0F: MOVEUNALIGNED_LEFT47_IMM(dst, src, n, 0x0F); break; \ + default:; \ + } \ +}) + +static inline void * +rte_memcpy(void *dst, const void *src, size_t n) +{ + __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7, xmm8; + uintptr_t dstu = (uintptr_t)dst; + uintptr_t srcu = (uintptr_t)src; + void *ret = dst; + size_t dstofss; + size_t srcofs; + + /** + * Copy less than 16 bytes + */ + if (n < 16) { + if (n & 0x01) { + *(uint8_t *)dstu = *(const uint8_t *)srcu; + srcu = (uintptr_t)((const uint8_t *)srcu + 1); + dstu = (uintptr_t)((uint8_t *)dstu + 1); + } + if (n & 0x02) { + *(uint16_t *)dstu = *(const uint16_t *)srcu; + srcu = (uintptr_t)((const uint16_t *)srcu + 1); + dstu = (uintptr_t)((uint16_t *)dstu + 1); + } + if (n & 0x04) { + *(uint32_t *)dstu = *(const uint32_t *)srcu; + srcu = (uintptr_t)((const uint32_t *)srcu + 1); + dstu = (uintptr_t)((uint32_t *)dstu + 1); + } + if (n & 0x08) { + *(uint64_t *)dstu = *(const uint64_t *)srcu; + } + return ret; + } + + /** + * Fast way when copy size doesn't exceed 512 bytes + */ + if (n <= 32) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 48) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 64) { + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst + 32, (const uint8_t *)src + 32); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n <= 128) { + goto COPY_BLOCK_128_BACK15; + } + if (n <= 512) { + if (n >= 256) { + n -= 256; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + rte_mov128((uint8_t *)dst + 128, (const uint8_t *)src + 128); + src = (const uint8_t *)src + 256; + dst = (uint8_t *)dst + 256; + } +COPY_BLOCK_255_BACK15: + if (n >= 128) { + n -= 128; + rte_mov128((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 128; + dst = (uint8_t *)dst + 128; + } +COPY_BLOCK_128_BACK15: + if (n >= 64) { + n -= 64; + rte_mov64((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 64; + dst = (uint8_t *)dst + 64; + } +COPY_BLOCK_64_BACK15: + if (n >= 32) { + n -= 32; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + 32; + dst = (uint8_t *)dst + 32; + } + if (n > 16) { + rte_mov16((uint8_t *)dst, (const uint8_t *)src); + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + return ret; + } + if (n > 0) { + rte_mov16((uint8_t *)dst - 16 + n, (const uint8_t *)src - 16 + n); + } + return ret; + } + + /** + * Make store aligned when copy size exceeds 512 bytes, + * and make sure the first 15 bytes are copied, because + * unaligned copy functions require up to 15 bytes + * backwards access. + */ + dstofss = (uintptr_t)dst & 0x0F; + if (dstofss > 0) { + dstofss = 16 - dstofss + 16; + n -= dstofss; + rte_mov32((uint8_t *)dst, (const uint8_t *)src); + src = (const uint8_t *)src + dstofss; + dst = (uint8_t *)dst + dstofss; + } + srcofs = ((uintptr_t)src & 0x0F); + + /** + * For aligned copy + */ + if (srcofs == 0) { + /** + * Copy 256-byte blocks + */ + for (; n >= 256; n -= 256) { + rte_mov256((uint8_t *)dst, (const uint8_t *)src); + dst = (uint8_t *)dst + 256; + src = (const uint8_t *)src + 256; + } + + /** + * Copy whatever left + */ + goto COPY_BLOCK_255_BACK15; + } + + /** + * For copy with unaligned load + */ + MOVEUNALIGNED_LEFT47(dst, src, n, srcofs); + + /** + * Copy whatever left + */ + goto COPY_BLOCK_64_BACK15; +} + +#endif /* RTE_MACHINE_CPUFLAG */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMCPY_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_prefetch.h b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h new file mode 100644 index 00000000..5dac47eb --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_prefetch.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_X86_64_H_ +#define _RTE_PREFETCH_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_prefetch.h" + +static inline void rte_prefetch0(const volatile void *p) +{ + asm volatile ("prefetcht0 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch1(const volatile void *p) +{ + asm volatile ("prefetcht1 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch2(const volatile void *p) +{ + asm volatile ("prefetcht2 %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +static inline void rte_prefetch_non_temporal(const volatile void *p) +{ + asm volatile ("prefetchnta %[p]" : : [p] "m" (*(const volatile char *)p)); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PREFETCH_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_rtm.h b/lib/librte_eal/common/include/arch/x86/rte_rtm.h new file mode 100644 index 00000000..d9356419 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_rtm.h @@ -0,0 +1,73 @@ +#ifndef _RTE_RTM_H_ +#define _RTE_RTM_H_ 1 + +/* + * Copyright (c) 2012,2013 Intel Corporation + * Author: Andi Kleen + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that: (1) source code distributions + * retain the above copyright notice and this paragraph in its entirety, (2) + * distributions including binary code include the above copyright notice and + * this paragraph in its entirety in the documentation or other materials + * provided with the distribution + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. + */ + +/* Official RTM intrinsics interface matching gcc/icc, but works + on older gcc compatible compilers and binutils. */ + + +#ifdef __cplusplus +extern "C" { +#endif + + +#define RTE_XBEGIN_STARTED (~0u) +#define RTE_XABORT_EXPLICIT (1 << 0) +#define RTE_XABORT_RETRY (1 << 1) +#define RTE_XABORT_CONFLICT (1 << 2) +#define RTE_XABORT_CAPACITY (1 << 3) +#define RTE_XABORT_DEBUG (1 << 4) +#define RTE_XABORT_NESTED (1 << 5) +#define RTE_XABORT_CODE(x) (((x) >> 24) & 0xff) + +static __attribute__((__always_inline__)) inline +unsigned int rte_xbegin(void) +{ + unsigned int ret = RTE_XBEGIN_STARTED; + + asm volatile(".byte 0xc7,0xf8 ; .long 0" : "+a" (ret) :: "memory"); + return ret; +} + +static __attribute__((__always_inline__)) inline +void rte_xend(void) +{ + asm volatile(".byte 0x0f,0x01,0xd5" ::: "memory"); +} + +static __attribute__((__always_inline__)) inline +void rte_xabort(const unsigned int status) +{ + asm volatile(".byte 0xc6,0xf8,%P0" :: "i" (status) : "memory"); +} + +static __attribute__((__always_inline__)) inline +int rte_xtest(void) +{ + unsigned char out; + + asm volatile(".byte 0x0f,0x01,0xd6 ; setnz %0" : + "=r" (out) :: "memory"); + return out; +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RTM_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_rwlock.h b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h new file mode 100644 index 00000000..afd1c3c2 --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_rwlock.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RWLOCK_X86_64_H_ +#define _RTE_RWLOCK_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_rwlock.h" +#include "rte_spinlock.h" + +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl) +{ + if (likely(rte_try_tm(&rwl->cnt))) + return; + rte_rwlock_read_lock(rwl); +} + +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl) +{ + if (unlikely(rwl->cnt)) + rte_rwlock_read_unlock(rwl); + else + rte_xend(); +} + +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl) +{ + if (likely(rte_try_tm(&rwl->cnt))) + return; + rte_rwlock_write_lock(rwl); +} + +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl) +{ + if (unlikely(rwl->cnt)) + rte_rwlock_write_unlock(rwl); + else + rte_xend(); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_spinlock.h b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h new file mode 100644 index 00000000..02f95cbb --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_spinlock.h @@ -0,0 +1,201 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_X86_64_H_ +#define _RTE_SPINLOCK_X86_64_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include "generic/rte_spinlock.h" +#include "rte_rtm.h" +#include "rte_cpuflags.h" +#include "rte_branch_prediction.h" +#include "rte_common.h" + +#define RTE_RTM_MAX_RETRIES (10) +#define RTE_XABORT_LOCK_BUSY (0xff) + +#ifndef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + int lock_val = 1; + asm volatile ( + "1:\n" + "xchg %[locked], %[lv]\n" + "test %[lv], %[lv]\n" + "jz 3f\n" + "2:\n" + "pause\n" + "cmpl $0, %[locked]\n" + "jnz 2b\n" + "jmp 1b\n" + "3:\n" + : [locked] "=m" (sl->locked), [lv] "=q" (lock_val) + : "[lv]" (lock_val) + : "memory"); +} + +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl) +{ + int unlock_val = 0; + asm volatile ( + "xchg %[locked], %[ulv]\n" + : [locked] "=m" (sl->locked), [ulv] "=q" (unlock_val) + : "[ulv]" (unlock_val) + : "memory"); +} + +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl) +{ + int lockval = 1; + + asm volatile ( + "xchg %[locked], %[lockval]" + : [locked] "=m" (sl->locked), [lockval] "=q" (lockval) + : "[lockval]" (lockval) + : "memory"); + + return lockval == 0; +} +#endif + +static uint8_t rtm_supported; /* cache the flag to avoid the overhead + of the rte_cpu_get_flag_enabled function */ + +static inline void __attribute__((constructor)) +rte_rtm_init(void) +{ + rtm_supported = rte_cpu_get_flag_enabled(RTE_CPUFLAG_RTM); +} + +static inline int rte_tm_supported(void) +{ + return rtm_supported; +} + +static inline int +rte_try_tm(volatile int *lock) +{ + if (!rtm_supported) + return 0; + + int retries = RTE_RTM_MAX_RETRIES; + + while (likely(retries--)) { + + unsigned int status = rte_xbegin(); + + if (likely(RTE_XBEGIN_STARTED == status)) { + if (unlikely(*lock)) + rte_xabort(RTE_XABORT_LOCK_BUSY); + else + return 1; + } + while (*lock) + rte_pause(); + + if ((status & RTE_XABORT_EXPLICIT) && + (RTE_XABORT_CODE(status) == RTE_XABORT_LOCK_BUSY)) + continue; + + if ((status & RTE_XABORT_RETRY) == 0) /* do not retry */ + break; + } + return 0; +} + +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return; + + rte_spinlock_lock(sl); /* fall-back */ +} + +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl) +{ + if (likely(rte_try_tm(&sl->locked))) + return 1; + + return rte_spinlock_trylock(sl); +} + +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl) +{ + if (unlikely(sl->locked)) + rte_spinlock_unlock(sl); + else + rte_xend(); +} + +static inline void +rte_spinlock_recursive_lock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return; + + rte_spinlock_recursive_lock(slr); /* fall-back */ +} + +static inline void +rte_spinlock_recursive_unlock_tm(rte_spinlock_recursive_t *slr) +{ + if (unlikely(slr->sl.locked)) + rte_spinlock_recursive_unlock(slr); + else + rte_xend(); +} + +static inline int +rte_spinlock_recursive_trylock_tm(rte_spinlock_recursive_t *slr) +{ + if (likely(rte_try_tm(&slr->sl.locked))) + return 1; + + return rte_spinlock_recursive_trylock(slr); +} + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_SPINLOCK_X86_64_H_ */ diff --git a/lib/librte_eal/common/include/arch/x86/rte_vect.h b/lib/librte_eal/common/include/arch/x86/rte_vect.h new file mode 100644 index 00000000..b698797c --- /dev/null +++ b/lib/librte_eal/common/include/arch/x86/rte_vect.h @@ -0,0 +1,132 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_VECT_H_ +#define _RTE_VECT_H_ + +/** + * @file + * + * RTE SSE/AVX related header. + */ + +#if (defined(__ICC) || (__GNUC__ == 4 && __GNUC_MINOR__ < 4)) + +#ifdef __SSE__ +#include +#endif + +#ifdef __SSE2__ +#include +#endif + +#ifdef __SSE3__ +#include +#endif + +#if defined(__SSE4_2__) || defined(__SSE4_1__) +#include +#endif + +#if defined(__AVX__) +#include +#endif + +#else + +#include + +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef __m128i xmm_t; + +#define XMM_SIZE (sizeof(xmm_t)) +#define XMM_MASK (XMM_SIZE - 1) + +typedef union rte_xmm { + xmm_t x; + uint8_t u8[XMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[XMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[XMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[XMM_SIZE / sizeof(uint64_t)]; + double pd[XMM_SIZE / sizeof(double)]; +} rte_xmm_t; + +#ifdef __AVX__ + +typedef __m256i ymm_t; + +#define YMM_SIZE (sizeof(ymm_t)) +#define YMM_MASK (YMM_SIZE - 1) + +typedef union rte_ymm { + ymm_t y; + xmm_t x[YMM_SIZE / sizeof(xmm_t)]; + uint8_t u8[YMM_SIZE / sizeof(uint8_t)]; + uint16_t u16[YMM_SIZE / sizeof(uint16_t)]; + uint32_t u32[YMM_SIZE / sizeof(uint32_t)]; + uint64_t u64[YMM_SIZE / sizeof(uint64_t)]; + double pd[YMM_SIZE / sizeof(double)]; +} rte_ymm_t; + +#endif /* __AVX__ */ + +#ifdef RTE_ARCH_I686 +#define _mm_cvtsi128_si64(a) ({ \ + rte_xmm_t m; \ + m.x = (a); \ + (m.u64[0]); \ +}) +#endif + +/* + * Prior to version 12.1 icc doesn't support _mm_set_epi64x. + */ +#if (defined(__ICC) && __ICC < 1210) +#define _mm_set_epi64x(a, b) ({ \ + rte_xmm_t m; \ + m.u64[0] = b; \ + m.u64[1] = a; \ + (m.x); \ +}) +#endif /* (defined(__ICC) && __ICC < 1210) */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VECT_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_atomic.h b/lib/librte_eal/common/include/generic/rte_atomic.h new file mode 100644 index 00000000..bfb4fe44 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_atomic.h @@ -0,0 +1,945 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ATOMIC_H_ +#define _RTE_ATOMIC_H_ + +/** + * @file + * Atomic Operations + * + * This file defines a generic API for atomic operations. + */ + +#include + +#ifdef __DOXYGEN__ + +/** + * General memory barrier. + * + * Guarantees that the LOAD and STORE operations generated before the + * barrier occur before the LOAD and STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_mb(void); + +/** + * Write memory barrier. + * + * Guarantees that the STORE operations generated before the barrier + * occur before the STORE operations generated after. + * This function is architecture dependent. + */ +static inline void rte_wmb(void); + +/** + * Read memory barrier. + * + * Guarantees that the LOAD operations generated before the barrier + * occur before the LOAD operations generated after. + * This function is architecture dependent. + */ +static inline void rte_rmb(void); + +/** + * General memory barrier between lcores + * + * Guarantees that the LOAD and STORE operations that precede the + * rte_smp_mb() call are globally visible across the lcores + * before the the LOAD and STORE operations that follows it. + */ +static inline void rte_smp_mb(void); + +/** + * Write memory barrier between lcores + * + * Guarantees that the STORE operations that precede the + * rte_smp_wmb() call are globally visible across the lcores + * before the the STORE operations that follows it. + */ +static inline void rte_smp_wmb(void); + +/** + * Read memory barrier between lcores + * + * Guarantees that the LOAD operations that precede the + * rte_smp_rmb() call are globally visible across the lcores + * before the the LOAD operations that follows it. + */ +static inline void rte_smp_rmb(void); + +#endif /* __DOXYGEN__ */ + +/** + * Compiler barrier. + * + * Guarantees that operation reordering does not occur at compile time + * for operations directly before and after the barrier. + */ +#define rte_compiler_barrier() do { \ + asm volatile ("" : : : "memory"); \ +} while(0) + +/*------------------------- 16 bit atomic operations -------------------------*/ + +/** + * Atomic compare and set. + * + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 16-bit words) + * + * @param dst + * The destination location into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic16_cmpset(volatile uint16_t *dst, uint16_t exp, uint16_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int16_t cnt; /**< An internal counter value. */ +} rte_atomic16_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC16_INIT(val) { (val) } + +/** + * Initialize an atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_init(rte_atomic16_t *v) +{ + v->cnt = 0; +} + +/** + * Atomically read a 16-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int16_t +rte_atomic16_read(const rte_atomic16_t *v) +{ + return v->cnt; +} + +/** + * Atomically set a counter to a 16-bit value. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value for the counter. + */ +static inline void +rte_atomic16_set(rte_atomic16_t *v, int16_t new_value) +{ + v->cnt = new_value; +} + +/** + * Atomically add a 16-bit value to an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic16_add(rte_atomic16_t *v, int16_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 16-bit value from an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic16_sub(rte_atomic16_t *v, int16_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_inc(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic16_inc(rte_atomic16_t *v) +{ + rte_atomic16_add(v, 1); +} +#endif + +/** + * Atomically decrement a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic16_dec(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic16_dec(rte_atomic16_t *v) +{ + rte_atomic16_sub(v, 1); +} +#endif + +/** + * Atomically add a 16-bit value to a counter and return the result. + * + * Atomically adds the 16-bits value (inc) to the atomic counter (v) and + * returns the value of v after addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int16_t +rte_atomic16_add_return(rte_atomic16_t *v, int16_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Atomically subtract a 16-bit value from a counter and return + * the result. + * + * Atomically subtracts the 16-bit value (inc) from the atomic counter + * (v) and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int16_t +rte_atomic16_sub_return(rte_atomic16_t *v, int16_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 16-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the increment operation is 0; false otherwise. + */ +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_inc_and_test(rte_atomic16_t *v) +{ + return __sync_add_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically decrement a 16-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the decrement operation is 0; false otherwise. + */ +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_dec_and_test(rte_atomic16_t *v) +{ + return __sync_sub_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically test and set a 16-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic16_test_and_set(rte_atomic16_t *v) +{ + return rte_atomic16_cmpset((volatile uint16_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 16-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic16_clear(rte_atomic16_t *v) +{ + v->cnt = 0; +} + +/*------------------------- 32 bit atomic operations -------------------------*/ + +/** + * Atomic compare and set. + * + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 32-bit words) + * + * @param dst + * The destination location into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic32_cmpset(volatile uint32_t *dst, uint32_t exp, uint32_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int32_t cnt; /**< An internal counter value. */ +} rte_atomic32_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC32_INIT(val) { (val) } + +/** + * Initialize an atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_init(rte_atomic32_t *v) +{ + v->cnt = 0; +} + +/** + * Atomically read a 32-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int32_t +rte_atomic32_read(const rte_atomic32_t *v) +{ + return v->cnt; +} + +/** + * Atomically set a counter to a 32-bit value. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value for the counter. + */ +static inline void +rte_atomic32_set(rte_atomic32_t *v, int32_t new_value) +{ + v->cnt = new_value; +} + +/** + * Atomically add a 32-bit value to an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic32_add(rte_atomic32_t *v, int32_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} + +/** + * Atomically subtract a 32-bit value from an atomic counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic32_sub(rte_atomic32_t *v, int32_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} + +/** + * Atomically increment a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_inc(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic32_inc(rte_atomic32_t *v) +{ + rte_atomic32_add(v, 1); +} +#endif + +/** + * Atomically decrement a counter by one. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic32_dec(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic32_dec(rte_atomic32_t *v) +{ + rte_atomic32_sub(v,1); +} +#endif + +/** + * Atomically add a 32-bit value to a counter and return the result. + * + * Atomically adds the 32-bits value (inc) to the atomic counter (v) and + * returns the value of v after addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int32_t +rte_atomic32_add_return(rte_atomic32_t *v, int32_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} + +/** + * Atomically subtract a 32-bit value from a counter and return + * the result. + * + * Atomically subtracts the 32-bit value (inc) from the atomic counter + * (v) and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int32_t +rte_atomic32_sub_return(rte_atomic32_t *v, int32_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} + +/** + * Atomically increment a 32-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the increment operation is 0; false otherwise. + */ +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_inc_and_test(rte_atomic32_t *v) +{ + return __sync_add_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically decrement a 32-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the decrement operation is 0; false otherwise. + */ +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_dec_and_test(rte_atomic32_t *v) +{ + return __sync_sub_and_fetch(&v->cnt, 1) == 0; +} +#endif + +/** + * Atomically test and set a 32-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic32_test_and_set(rte_atomic32_t *v) +{ + return rte_atomic32_cmpset((volatile uint32_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 32-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic32_clear(rte_atomic32_t *v) +{ + v->cnt = 0; +} + +/*------------------------- 64 bit atomic operations -------------------------*/ + +/** + * An atomic compare and set function used by the mutex functions. + * (atomic) equivalent to: + * if (*dst == exp) + * *dst = src (all 64-bit words) + * + * @param dst + * The destination into which the value will be written. + * @param exp + * The expected value. + * @param src + * The new value. + * @return + * Non-zero on success; 0 on failure. + */ +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_atomic64_cmpset(volatile uint64_t *dst, uint64_t exp, uint64_t src) +{ + return __sync_bool_compare_and_swap(dst, exp, src); +} +#endif + +/** + * The atomic counter structure. + */ +typedef struct { + volatile int64_t cnt; /**< Internal counter value. */ +} rte_atomic64_t; + +/** + * Static initializer for an atomic counter. + */ +#define RTE_ATOMIC64_INIT(val) { (val) } + +/** + * Initialize the atomic counter. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_init(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_init(rte_atomic64_t *v) +{ +#ifdef __LP64__ + v->cnt = 0; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, 0); + } +#endif +} +#endif + +/** + * Atomically read a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @return + * The value of the counter. + */ +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_read(rte_atomic64_t *v) +{ +#ifdef __LP64__ + return v->cnt; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + /* replace the value by itself */ + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, tmp); + } + return tmp; +#endif +} +#endif + +/** + * Atomically set a 64-bit counter. + * + * @param v + * A pointer to the atomic counter. + * @param new_value + * The new value of the counter. + */ +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_set(rte_atomic64_t *v, int64_t new_value) +{ +#ifdef __LP64__ + v->cnt = new_value; +#else + int success = 0; + uint64_t tmp; + + while (success == 0) { + tmp = v->cnt; + success = rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, + tmp, new_value); + } +#endif +} +#endif + +/** + * Atomically add a 64-bit value to a counter. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + */ +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_add(rte_atomic64_t *v, int64_t inc) +{ + __sync_fetch_and_add(&v->cnt, inc); +} +#endif + +/** + * Atomically subtract a 64-bit value from a counter. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + */ +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_sub(rte_atomic64_t *v, int64_t dec) +{ + __sync_fetch_and_sub(&v->cnt, dec); +} +#endif + +/** + * Atomically increment a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_inc(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_inc(rte_atomic64_t *v) +{ + rte_atomic64_add(v, 1); +} +#endif + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void +rte_atomic64_dec(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_atomic64_dec(rte_atomic64_t *v) +{ + rte_atomic64_sub(v, 1); +} +#endif + +/** + * Add a 64-bit value to an atomic counter and return the result. + * + * Atomically adds the 64-bit value (inc) to the atomic counter (v) and + * returns the value of v after the addition. + * + * @param v + * A pointer to the atomic counter. + * @param inc + * The value to be added to the counter. + * @return + * The value of v after the addition. + */ +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_add_return(rte_atomic64_t *v, int64_t inc) +{ + return __sync_add_and_fetch(&v->cnt, inc); +} +#endif + +/** + * Subtract a 64-bit value from an atomic counter and return the result. + * + * Atomically subtracts the 64-bit value (dec) from the atomic counter (v) + * and returns the value of v after the subtraction. + * + * @param v + * A pointer to the atomic counter. + * @param dec + * The value to be subtracted from the counter. + * @return + * The value of v after the subtraction. + */ +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec); + +#ifdef RTE_FORCE_INTRINSICS +static inline int64_t +rte_atomic64_sub_return(rte_atomic64_t *v, int64_t dec) +{ + return __sync_sub_and_fetch(&v->cnt, dec); +} +#endif + +/** + * Atomically increment a 64-bit counter by one and test. + * + * Atomically increments the atomic counter (v) by one and returns + * true if the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after the addition is 0; false otherwise. + */ +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_inc_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_add_return(v, 1) == 0; +} +#endif + +/** + * Atomically decrement a 64-bit counter by one and test. + * + * Atomically decrements the atomic counter (v) by one and returns true if + * the result is 0, or false in all other cases. + * + * @param v + * A pointer to the atomic counter. + * @return + * True if the result after subtraction is 0; false otherwise. + */ +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_dec_and_test(rte_atomic64_t *v) +{ + return rte_atomic64_sub_return(v, 1) == 0; +} +#endif + +/** + * Atomically test and set a 64-bit atomic counter. + * + * If the counter value is already set, return 0 (failed). Otherwise, set + * the counter value to 1 and return 1 (success). + * + * @param v + * A pointer to the atomic counter. + * @return + * 0 if failed; else 1, success. + */ +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline int rte_atomic64_test_and_set(rte_atomic64_t *v) +{ + return rte_atomic64_cmpset((volatile uint64_t *)&v->cnt, 0, 1); +} +#endif + +/** + * Atomically set a 64-bit counter to 0. + * + * @param v + * A pointer to the atomic counter. + */ +static inline void rte_atomic64_clear(rte_atomic64_t *v); + +#ifdef RTE_FORCE_INTRINSICS +static inline void rte_atomic64_clear(rte_atomic64_t *v) +{ + rte_atomic64_set(v, 0); +} +#endif + +#endif /* _RTE_ATOMIC_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_byteorder.h b/lib/librte_eal/common/include/generic/rte_byteorder.h new file mode 100644 index 00000000..c46fdcf2 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_byteorder.h @@ -0,0 +1,217 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_BYTEORDER_H_ +#define _RTE_BYTEORDER_H_ + +/** + * @file + * + * Byte Swap Operations + * + * This file defines a generic API for byte swap operations. Part of + * the implementation is architecture-specific. + */ + +#include +#ifdef RTE_EXEC_ENV_BSDAPP +#include +#else +#include +#endif + +/* + * Compile-time endianness detection + */ +#define RTE_BIG_ENDIAN 1 +#define RTE_LITTLE_ENDIAN 2 +#if defined __BYTE_ORDER__ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif /* __BYTE_ORDER__ */ +#elif defined __BYTE_ORDER +#if __BYTE_ORDER == __BIG_ENDIAN +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif __BYTE_ORDER == __LITTLE_ENDIAN +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif /* __BYTE_ORDER */ +#elif defined __BIG_ENDIAN__ +#define RTE_BYTE_ORDER RTE_BIG_ENDIAN +#elif defined __LITTLE_ENDIAN__ +#define RTE_BYTE_ORDER RTE_LITTLE_ENDIAN +#endif + +/* + * An internal function to swap bytes in a 16-bit value. + * + * It is used by rte_bswap16() when the value is constant. Do not use + * this function directly; rte_bswap16() is preferred. + */ +static inline uint16_t +rte_constant_bswap16(uint16_t x) +{ + return (uint16_t)(((x & 0x00ffU) << 8) | + ((x & 0xff00U) >> 8)); +} + +/* + * An internal function to swap bytes in a 32-bit value. + * + * It is used by rte_bswap32() when the value is constant. Do not use + * this function directly; rte_bswap32() is preferred. + */ +static inline uint32_t +rte_constant_bswap32(uint32_t x) +{ + return ((x & 0x000000ffUL) << 24) | + ((x & 0x0000ff00UL) << 8) | + ((x & 0x00ff0000UL) >> 8) | + ((x & 0xff000000UL) >> 24); +} + +/* + * An internal function to swap bytes of a 64-bit value. + * + * It is used by rte_bswap64() when the value is constant. Do not use + * this function directly; rte_bswap64() is preferred. + */ +static inline uint64_t +rte_constant_bswap64(uint64_t x) +{ + return ((x & 0x00000000000000ffULL) << 56) | + ((x & 0x000000000000ff00ULL) << 40) | + ((x & 0x0000000000ff0000ULL) << 24) | + ((x & 0x00000000ff000000ULL) << 8) | + ((x & 0x000000ff00000000ULL) >> 8) | + ((x & 0x0000ff0000000000ULL) >> 24) | + ((x & 0x00ff000000000000ULL) >> 40) | + ((x & 0xff00000000000000ULL) >> 56); +} + + +#ifdef __DOXYGEN__ + +/** + * Swap bytes in a 16-bit value. + */ +static uint16_t rte_bswap16(uint16_t _x); + +/** + * Swap bytes in a 32-bit value. + */ +static uint32_t rte_bswap32(uint32_t x); + +/** + * Swap bytes in a 64-bit value. + */ +static uint64_t rte_bswap64(uint64_t x); + +/** + * Convert a 16-bit value from CPU order to little endian. + */ +static uint16_t rte_cpu_to_le_16(uint16_t x); + +/** + * Convert a 32-bit value from CPU order to little endian. + */ +static uint32_t rte_cpu_to_le_32(uint32_t x); + +/** + * Convert a 64-bit value from CPU order to little endian. + */ +static uint64_t rte_cpu_to_le_64(uint64_t x); + + +/** + * Convert a 16-bit value from CPU order to big endian. + */ +static uint16_t rte_cpu_to_be_16(uint16_t x); + +/** + * Convert a 32-bit value from CPU order to big endian. + */ +static uint32_t rte_cpu_to_be_32(uint32_t x); + +/** + * Convert a 64-bit value from CPU order to big endian. + */ +static uint64_t rte_cpu_to_be_64(uint64_t x); + + +/** + * Convert a 16-bit value from little endian to CPU order. + */ +static uint16_t rte_le_to_cpu_16(uint16_t x); + +/** + * Convert a 32-bit value from little endian to CPU order. + */ +static uint32_t rte_le_to_cpu_32(uint32_t x); + +/** + * Convert a 64-bit value from little endian to CPU order. + */ +static uint64_t rte_le_to_cpu_64(uint64_t x); + + +/** + * Convert a 16-bit value from big endian to CPU order. + */ +static uint16_t rte_be_to_cpu_16(uint16_t x); + +/** + * Convert a 32-bit value from big endian to CPU order. + */ +static uint32_t rte_be_to_cpu_32(uint32_t x); + +/** + * Convert a 64-bit value from big endian to CPU order. + */ +static uint64_t rte_be_to_cpu_64(uint64_t x); + +#endif /* __DOXYGEN__ */ + +#ifdef RTE_FORCE_INTRINSICS +#if __GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 8) +#define rte_bswap16(x) __builtin_bswap16(x) +#endif + +#define rte_bswap32(x) __builtin_bswap32(x) + +#define rte_bswap64(x) __builtin_bswap64(x) + +#endif + +#endif /* _RTE_BYTEORDER_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_cpuflags.h b/lib/librte_eal/common/include/generic/rte_cpuflags.h new file mode 100644 index 00000000..c1da357c --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_cpuflags.h @@ -0,0 +1,82 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CPUFLAGS_H_ +#define _RTE_CPUFLAGS_H_ + +/** + * @file + * Architecture specific API to determine available CPU features at runtime. + */ + +#include + +/** + * Enumeration of all CPU features supported + */ +enum rte_cpu_flag_t; + +/** + * Get name of CPU flag + * + * @param feature + * CPU flag ID + * @return + * flag name + * NULL if flag ID is invalid + */ +const char * +rte_cpu_get_flag_name(enum rte_cpu_flag_t feature); + +/** + * Function for checking a CPU flag availability + * + * @param feature + * CPU flag to query CPU for + * @return + * 1 if flag is available + * 0 if flag is not available + * -ENOENT if flag is invalid + */ +int +rte_cpu_get_flag_enabled(enum rte_cpu_flag_t feature); + +/** + * This function checks that the currently used CPU supports the CPU features + * that were specified at compile time. It is called automatically within the + * EAL, so does not need to be used by applications. + */ +void +rte_cpu_check_supported(void); + +#endif /* _RTE_CPUFLAGS_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_cycles.h b/lib/librte_eal/common/include/generic/rte_cycles.h new file mode 100644 index 00000000..8cc21f20 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_cycles.h @@ -0,0 +1,205 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_CYCLES_H_ +#define _RTE_CYCLES_H_ + +/** + * @file + * + * Simple Time Reference Functions (Cycles and HPET). + */ + +#include +#include +#include + +#define MS_PER_S 1000 +#define US_PER_S 1000000 +#define NS_PER_S 1000000000 + +enum timer_source { + EAL_TIMER_TSC = 0, + EAL_TIMER_HPET +}; +extern enum timer_source eal_timer_source; + +/** + * Get the measured frequency of the RDTSC counter + * + * @return + * The TSC frequency for this lcore + */ +uint64_t +rte_get_tsc_hz(void); + +/** + * Return the number of TSC cycles since boot + * + * @return + * the number of cycles + */ +static inline uint64_t +rte_get_tsc_cycles(void); + +#ifdef RTE_LIBEAL_USE_HPET +/** + * Return the number of HPET cycles since boot + * + * This counter is global for all execution units. The number of + * cycles in one second can be retrieved using rte_get_hpet_hz(). + * + * @return + * the number of cycles + */ +uint64_t +rte_get_hpet_cycles(void); + +/** + * Get the number of HPET cycles in one second. + * + * @return + * The number of cycles in one second. + */ +uint64_t +rte_get_hpet_hz(void); + +/** + * Initialise the HPET for use. This must be called before the rte_get_hpet_hz + * and rte_get_hpet_cycles APIs are called. If this function does not succeed, + * then the HPET functions are unavailable and should not be called. + * + * @param make_default + * If set, the hpet timer becomes the default timer whose values are + * returned by the rte_get_timer_hz/cycles API calls + * + * @return + * 0 on success, + * -1 on error, and the make_default parameter is ignored. + */ +int rte_eal_hpet_init(int make_default); + +#endif + +/** + * Get the number of cycles since boot from the default timer. + * + * @return + * The number of cycles + */ +static inline uint64_t +rte_get_timer_cycles(void) +{ + switch(eal_timer_source) { + case EAL_TIMER_TSC: + return rte_get_tsc_cycles(); + case EAL_TIMER_HPET: +#ifdef RTE_LIBEAL_USE_HPET + return rte_get_hpet_cycles(); +#endif + default: rte_panic("Invalid timer source specified\n"); + } +} + +/** + * Get the number of cycles in one second for the default timer. + * + * @return + * The number of cycles in one second. + */ +static inline uint64_t +rte_get_timer_hz(void) +{ + switch(eal_timer_source) { + case EAL_TIMER_TSC: + return rte_get_tsc_hz(); + case EAL_TIMER_HPET: +#ifdef RTE_LIBEAL_USE_HPET + return rte_get_hpet_hz(); +#endif + default: rte_panic("Invalid timer source specified\n"); + } +} + +/** + * Wait at least us microseconds. + * + * @param us + * The number of microseconds to wait. + */ +void +rte_delay_us(unsigned us); + +/** + * Wait at least ms milliseconds. + * + * @param ms + * The number of milliseconds to wait. + */ +static inline void +rte_delay_ms(unsigned ms) +{ + rte_delay_us(ms * 1000); +} + +#endif /* _RTE_CYCLES_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_memcpy.h b/lib/librte_eal/common/include/generic/rte_memcpy.h new file mode 100644 index 00000000..03e84773 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_memcpy.h @@ -0,0 +1,144 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMCPY_H_ +#define _RTE_MEMCPY_H_ + +/** + * @file + * + * Functions for vectorised implementation of memcpy(). + */ + +/** + * Copy 16 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov16(uint8_t *dst, const uint8_t *src); + +/** + * Copy 32 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov32(uint8_t *dst, const uint8_t *src); + +/** + * Copy 48 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov48(uint8_t *dst, const uint8_t *src); + +/** + * Copy 64 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov64(uint8_t *dst, const uint8_t *src); + +/** + * Copy 128 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov128(uint8_t *dst, const uint8_t *src); + +/** + * Copy 256 bytes from one location to another using optimised + * instructions. The locations should not overlap. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + */ +static inline void +rte_mov256(uint8_t *dst, const uint8_t *src); + +#ifdef __DOXYGEN__ + +/** + * Copy bytes from one location to another. The locations must not overlap. + * + * @note This is implemented as a macro, so it's address should not be taken + * and care is needed as parameter expressions may be evaluated multiple times. + * + * @param dst + * Pointer to the destination of the data. + * @param src + * Pointer to the source data. + * @param n + * Number of bytes to copy. + * @return + * Pointer to the destination data. + */ +static void * +rte_memcpy(void *dst, const void *src, size_t n); + +#endif /* __DOXYGEN__ */ + +/* + * memcpy() function used by rte_memcpy macro + */ +static inline void * +rte_memcpy_func(void *dst, const void *src, size_t n) __attribute__((always_inline)); + + +#endif /* _RTE_MEMCPY_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_prefetch.h b/lib/librte_eal/common/include/generic/rte_prefetch.h new file mode 100644 index 00000000..07e409ec --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_prefetch.h @@ -0,0 +1,83 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PREFETCH_H_ +#define _RTE_PREFETCH_H_ + +/** + * @file + * + * Prefetch operations. + * + * This file defines an API for prefetch macros / inline-functions, + * which are architecture-dependent. Prefetching occurs when a + * processor requests an instruction or data from memory to cache + * before it is actually needed, potentially speeding up the execution of the + * program. + */ + +/** + * Prefetch a cache line into all cache levels. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch0(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels except the 0th cache level. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch1(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels except the 0th and 1th cache + * levels. + * @param p + * Address to prefetch + */ +static inline void rte_prefetch2(const volatile void *p); + +/** + * Prefetch a cache line into all cache levels (non-temporal/transient version) + * + * The non-temporal prefetch is intended as a prefetch hint that processor will + * use the prefetched data only once or short period, unlike the + * rte_prefetch0() function which imply that prefetched data to use repeatedly. + * + * @param p + * Address to prefetch + */ +static inline void rte_prefetch_non_temporal(const volatile void *p); + +#endif /* _RTE_PREFETCH_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_rwlock.h b/lib/librte_eal/common/include/generic/rte_rwlock.h new file mode 100644 index 00000000..7a0fdc55 --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_rwlock.h @@ -0,0 +1,208 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RWLOCK_H_ +#define _RTE_RWLOCK_H_ + +/** + * @file + * + * RTE Read-Write Locks + * + * This file defines an API for read-write locks. The lock is used to + * protect data that allows multiple readers in parallel, but only + * one writer. All readers are blocked until the writer is finished + * writing. + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * The rte_rwlock_t type. + * + * cnt is -1 when write lock is held, and > 0 when read locks are held. + */ +typedef struct { + volatile int32_t cnt; /**< -1 when W lock held, > 0 when R locks held. */ +} rte_rwlock_t; + +/** + * A static rwlock initializer. + */ +#define RTE_RWLOCK_INITIALIZER { 0 } + +/** + * Initialize the rwlock to an unlocked state. + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_init(rte_rwlock_t *rwl) +{ + rwl->cnt = 0; +} + +/** + * Take a read lock. Loop until the lock is held. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_read_lock(rte_rwlock_t *rwl) +{ + int32_t x; + int success = 0; + + while (success == 0) { + x = rwl->cnt; + /* write lock is held */ + if (x < 0) { + rte_pause(); + continue; + } + success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, + x, x + 1); + } +} + +/** + * Release a read lock. + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_read_unlock(rte_rwlock_t *rwl) +{ + rte_atomic32_dec((rte_atomic32_t *)(intptr_t)&rwl->cnt); +} + +/** + * Take a write lock. Loop until the lock is held. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_lock(rte_rwlock_t *rwl) +{ + int32_t x; + int success = 0; + + while (success == 0) { + x = rwl->cnt; + /* a lock is held */ + if (x != 0) { + rte_pause(); + continue; + } + success = rte_atomic32_cmpset((volatile uint32_t *)&rwl->cnt, + 0, -1); + } +} + +/** + * Release a write lock. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_unlock(rte_rwlock_t *rwl) +{ + rte_atomic32_inc((rte_atomic32_t *)(intptr_t)&rwl->cnt); +} + +/** + * Try to execute critical section in a hardware memory transaction, if it + * fails or not available take a read lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_read_lock_tm(rte_rwlock_t *rwl); + +/** + * Commit hardware memory transaction or release the read lock if the lock is used as a fall-back + * + * @param rwl + * A pointer to the rwlock structure. + */ +static inline void +rte_rwlock_read_unlock_tm(rte_rwlock_t *rwl); + +/** + * Try to execute critical section in a hardware memory transaction, if it + * fails or not available take a write lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_lock_tm(rte_rwlock_t *rwl); + +/** + * Commit hardware memory transaction or release the write lock if the lock is used as a fall-back + * + * @param rwl + * A pointer to a rwlock structure. + */ +static inline void +rte_rwlock_write_unlock_tm(rte_rwlock_t *rwl); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_RWLOCK_H_ */ diff --git a/lib/librte_eal/common/include/generic/rte_spinlock.h b/lib/librte_eal/common/include/generic/rte_spinlock.h new file mode 100644 index 00000000..e51fc56b --- /dev/null +++ b/lib/librte_eal/common/include/generic/rte_spinlock.h @@ -0,0 +1,325 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_SPINLOCK_H_ +#define _RTE_SPINLOCK_H_ + +/** + * @file + * + * RTE Spinlocks + * + * This file defines an API for read-write locks, which are implemented + * in an architecture-specific way. This kind of lock simply waits in + * a loop repeatedly checking until the lock becomes available. + * + * All locks must be initialised before use, and only initialised once. + * + */ + +#include +#ifdef RTE_FORCE_INTRINSICS +#include +#endif + +/** + * The rte_spinlock_t type. + */ +typedef struct { + volatile int locked; /**< lock status 0 = unlocked, 1 = locked */ +} rte_spinlock_t; + +/** + * A static spinlock initializer. + */ +#define RTE_SPINLOCK_INITIALIZER { 0 } + +/** + * Initialize the spinlock to an unlocked state. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_init(rte_spinlock_t *sl) +{ + sl->locked = 0; +} + +/** + * Take the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock(rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_lock(rte_spinlock_t *sl) +{ + while (__sync_lock_test_and_set(&sl->locked, 1)) + while(sl->locked) + rte_pause(); +} +#endif + +/** + * Release the spinlock. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline void +rte_spinlock_unlock (rte_spinlock_t *sl) +{ + __sync_lock_release(&sl->locked); +} +#endif + +/** + * Try to take the lock. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl); + +#ifdef RTE_FORCE_INTRINSICS +static inline int +rte_spinlock_trylock (rte_spinlock_t *sl) +{ + return __sync_lock_test_and_set(&sl->locked,1) == 0; +} +#endif + +/** + * Test if the lock is taken. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the lock is currently taken; 0 otherwise. + */ +static inline int rte_spinlock_is_locked (rte_spinlock_t *sl) +{ + return sl->locked; +} + +/** + * Test if hardware transactional memory (lock elision) is supported + * + * @return + * 1 if the hardware transactional memory is supported; 0 otherwise. + */ +static inline int rte_tm_supported(void); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the spinlock. + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_lock_tm(rte_spinlock_t *sl); + +/** + * Commit hardware memory transaction or release the spinlock if + * the spinlock is used as a fall-back + * + * @param sl + * A pointer to the spinlock. + */ +static inline void +rte_spinlock_unlock_tm(rte_spinlock_t *sl); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the lock. + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param sl + * A pointer to the spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int +rte_spinlock_trylock_tm(rte_spinlock_t *sl); + +/** + * The rte_spinlock_recursive_t type. + */ +typedef struct { + rte_spinlock_t sl; /**< the actual spinlock */ + volatile int user; /**< core id using lock, -1 for unused */ + volatile int count; /**< count of time this lock has been called */ +} rte_spinlock_recursive_t; + +/** + * A static recursive spinlock initializer. + */ +#define RTE_SPINLOCK_RECURSIVE_INITIALIZER {RTE_SPINLOCK_INITIALIZER, -1, 0} + +/** + * Initialize the recursive spinlock to an unlocked state. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_init(rte_spinlock_recursive_t *slr) +{ + rte_spinlock_init(&slr->sl); + slr->user = -1; + slr->count = 0; +} + +/** + * Take the recursive spinlock. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock(rte_spinlock_recursive_t *slr) +{ + int id = rte_gettid(); + + if (slr->user != id) { + rte_spinlock_lock(&slr->sl); + slr->user = id; + } + slr->count++; +} +/** + * Release the recursive spinlock. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock(rte_spinlock_recursive_t *slr) +{ + if (--(slr->count) == 0) { + slr->user = -1; + rte_spinlock_unlock(&slr->sl); + } + +} + +/** + * Try to take the recursive lock. + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock(rte_spinlock_recursive_t *slr) +{ + int id = rte_gettid(); + + if (slr->user != id) { + if (rte_spinlock_trylock(&slr->sl) == 0) + return 0; + slr->user = id; + } + slr->count++; + return 1; +} + + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available take the recursive spinlocks + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_lock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Commit hardware memory transaction or release the recursive spinlock + * if the recursive spinlock is used as a fall-back + * + * @param slr + * A pointer to the recursive spinlock. + */ +static inline void rte_spinlock_recursive_unlock_tm( + rte_spinlock_recursive_t *slr); + +/** + * Try to execute critical section in a hardware memory transaction, + * if it fails or not available try to take the recursive lock + * + * NOTE: An attempt to perform a HW I/O operation inside a hardware memory + * transaction always aborts the transaction since the CPU is not able to + * roll-back should the transaction fail. Therefore, hardware transactional + * locks are not advised to be used around rte_eth_rx_burst() and + * rte_eth_tx_burst() calls. + * + * @param slr + * A pointer to the recursive spinlock. + * @return + * 1 if the hardware memory transaction is successfully started + * or lock is successfully taken; 0 otherwise. + */ +static inline int rte_spinlock_recursive_trylock_tm( + rte_spinlock_recursive_t *slr); + +#endif /* _RTE_SPINLOCK_H_ */ diff --git a/lib/librte_eal/common/include/rte_alarm.h b/lib/librte_eal/common/include/rte_alarm.h new file mode 100644 index 00000000..4012cd67 --- /dev/null +++ b/lib/librte_eal/common/include/rte_alarm.h @@ -0,0 +1,106 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_ALARM_H_ +#define _RTE_ALARM_H_ + +/** + * @file + * + * Alarm functions + * + * Simple alarm-clock functionality supplied by eal. + * Does not require hpet support. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Signature of callback back function called when an alarm goes off. + */ +typedef void (*rte_eal_alarm_callback)(void *arg); + +/** + * Function to set a callback to be triggered when us microseconds + * have expired. Accuracy of timing to the microsecond is not guaranteed. The + * alarm function will not be called *before* the requested time, but may + * be called a short period of time afterwards. + * The alarm handler will be called only once. There is no need to call + * "rte_eal_alarm_cancel" from within the callback function. + * + * @param us + * The time in microseconds before the callback is called + * @param cb + * The function to be called when the alarm expires + * @param cb_arg + * Pointer parameter to be passed to the callback function + * + * @return + * On success, zero. + * On failure, a negative error number + */ +int rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb, void *cb_arg); + +/** + * Function to cancel an alarm callback which has been registered before. If + * used outside alarm callback it wait for all callbacks to finish execution. + * + * @param cb_fn + * alarm callback + * @param cb_arg + * Pointer parameter to be passed to the callback function. To remove all + * copies of a given callback function, irrespective of parameter, (void *)-1 + * can be used here. + * + * @return + * - value greater than 0 and rte_errno not changed - returned value is + * the number of canceled alarm callback functions + * - value greater or equal 0 and rte_errno set to EINPROGRESS, at least one + * alarm could not be canceled because cancellation was requested from alarm + * callback context. Returned value is the number of succesfuly canceled + * alarm callbacks + * - 0 and rte_errno set to ENOENT - no alarm found + * - -1 and rte_errno set to EINVAL - invalid parameter (NULL callback) + */ +int rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg); + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_ALARM_H_ */ diff --git a/lib/librte_eal/common/include/rte_branch_prediction.h b/lib/librte_eal/common/include/rte_branch_prediction.h new file mode 100644 index 00000000..a6a56d17 --- /dev/null +++ b/lib/librte_eal/common/include/rte_branch_prediction.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Branch Prediction Helpers in RTE + */ + +#ifndef _RTE_BRANCH_PREDICTION_H_ +#define _RTE_BRANCH_PREDICTION_H_ + +/** + * Check if a branch is likely to be taken. + * + * This compiler builtin allows the developer to indicate if a branch is + * likely to be taken. Example: + * + * if (likely(x > 1)) + * do_stuff(); + * + */ +#ifndef likely +#define likely(x) __builtin_expect((x),1) +#endif /* likely */ + +/** + * Check if a branch is unlikely to be taken. + * + * This compiler builtin allows the developer to indicate if a branch is + * unlikely to be taken. Example: + * + * if (unlikely(x < 1)) + * do_stuff(); + * + */ +#ifndef unlikely +#define unlikely(x) __builtin_expect((x),0) +#endif /* unlikely */ + +#endif /* _RTE_BRANCH_PREDICTION_H_ */ diff --git a/lib/librte_eal/common/include/rte_common.h b/lib/librte_eal/common/include/rte_common.h new file mode 100644 index 00000000..332f2a43 --- /dev/null +++ b/lib/librte_eal/common/include/rte_common.h @@ -0,0 +1,401 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_COMMON_H_ +#define _RTE_COMMON_H_ + +/** + * @file + * + * Generic, commonly-used macro and inline function definitions + * for DPDK. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include + +#ifndef typeof +#define typeof __typeof__ +#endif + +#ifndef asm +#define asm __asm__ +#endif + +#ifdef RTE_ARCH_STRICT_ALIGN +typedef uint64_t unaligned_uint64_t __attribute__ ((aligned(1))); +typedef uint32_t unaligned_uint32_t __attribute__ ((aligned(1))); +typedef uint16_t unaligned_uint16_t __attribute__ ((aligned(1))); +#else +typedef uint64_t unaligned_uint64_t; +typedef uint32_t unaligned_uint32_t; +typedef uint16_t unaligned_uint16_t; +#endif + +/** + * Force alignment + */ +#define __rte_aligned(a) __attribute__((__aligned__(a))) + +/** + * Force a structure to be packed + */ +#define __rte_packed __attribute__((__packed__)) + +/******* Macro to mark functions and fields scheduled for removal *****/ +#define __rte_deprecated __attribute__((__deprecated__)) + +/*********** Macros to eliminate unused variable warnings ********/ + +/** + * short definition to mark a function parameter unused + */ +#define __rte_unused __attribute__((__unused__)) + +/** + * definition to mark a variable or function parameter as used so + * as to avoid a compiler warning + */ +#define RTE_SET_USED(x) (void)(x) + +/*********** Macros for pointer arithmetic ********/ + +/** + * add a byte-value offset from a pointer + */ +#define RTE_PTR_ADD(ptr, x) ((void*)((uintptr_t)(ptr) + (x))) + +/** + * subtract a byte-value offset from a pointer + */ +#define RTE_PTR_SUB(ptr, x) ((void*)((uintptr_t)ptr - (x))) + +/** + * get the difference between two pointer values, i.e. how far apart + * in bytes are the locations they point two. It is assumed that + * ptr1 is greater than ptr2. + */ +#define RTE_PTR_DIFF(ptr1, ptr2) ((uintptr_t)(ptr1) - (uintptr_t)(ptr2)) + +/*********** Macros/static functions for doing alignment ********/ + + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no higher than the first parameter. Second parameter + * must be a power-of-two value. + */ +#define RTE_PTR_ALIGN_FLOOR(ptr, align) \ + ((typeof(ptr))RTE_ALIGN_FLOOR((uintptr_t)ptr, align)) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no + * bigger than the first parameter. Second parameter must be a + * power-of-two value. + */ +#define RTE_ALIGN_FLOOR(val, align) \ + (typeof(val))((val) & (~((typeof(val))((align) - 1)))) + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no lower than the first parameter. Second parameter + * must be a power-of-two value. + */ +#define RTE_PTR_ALIGN_CEIL(ptr, align) \ + RTE_PTR_ALIGN_FLOOR((typeof(ptr))RTE_PTR_ADD(ptr, (align) - 1), align) + +/** + * Macro to align a value to a given power-of-two. The resultant value + * will be of the same type as the first parameter, and will be no lower + * than the first parameter. Second parameter must be a power-of-two + * value. + */ +#define RTE_ALIGN_CEIL(val, align) \ + RTE_ALIGN_FLOOR(((val) + ((typeof(val)) (align) - 1)), align) + +/** + * Macro to align a pointer to a given power-of-two. The resultant + * pointer will be a pointer of the same type as the first parameter, and + * point to an address no lower than the first parameter. Second parameter + * must be a power-of-two value. + * This function is the same as RTE_PTR_ALIGN_CEIL + */ +#define RTE_PTR_ALIGN(ptr, align) RTE_PTR_ALIGN_CEIL(ptr, align) + +/** + * Macro to align a value to a given power-of-two. The resultant + * value will be of the same type as the first parameter, and + * will be no lower than the first parameter. Second parameter + * must be a power-of-two value. + * This function is the same as RTE_ALIGN_CEIL + */ +#define RTE_ALIGN(val, align) RTE_ALIGN_CEIL(val, align) + +/** + * Checks if a pointer is aligned to a given power-of-two value + * + * @param ptr + * The pointer whose alignment is to be checked + * @param align + * The power-of-two value to which the ptr should be aligned + * + * @return + * True(1) where the pointer is correctly aligned, false(0) otherwise + */ +static inline int +rte_is_aligned(void *ptr, unsigned align) +{ + return RTE_PTR_ALIGN(ptr, align) == ptr; +} + +/*********** Macros for compile type checks ********/ + +/** + * Triggers an error at compilation time if the condition is true. + */ +#ifndef __OPTIMIZE__ +#define RTE_BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) +#else +extern int RTE_BUILD_BUG_ON_detected_error; +#define RTE_BUILD_BUG_ON(condition) do { \ + ((void)sizeof(char[1 - 2*!!(condition)])); \ + if (condition) \ + RTE_BUILD_BUG_ON_detected_error = 1; \ +} while(0) +#endif + +/*********** Macros to work with powers of 2 ********/ + +/** + * Returns true if n is a power of 2 + * @param n + * Number to check + * @return 1 if true, 0 otherwise + */ +static inline int +rte_is_power_of_2(uint32_t n) +{ + return n && !(n & (n - 1)); +} + +/** + * Aligns input parameter to the next power of 2 + * + * @param x + * The integer value to algin + * + * @return + * Input parameter aligned to the next power of 2 + */ +static inline uint32_t +rte_align32pow2(uint32_t x) +{ + x--; + x |= x >> 1; + x |= x >> 2; + x |= x >> 4; + x |= x >> 8; + x |= x >> 16; + + return x + 1; +} + +/** + * Aligns 64b input parameter to the next power of 2 + * + * @param v + * The 64b value to align + * + * @return + * Input parameter aligned to the next power of 2 + */ +static inline uint64_t +rte_align64pow2(uint64_t v) +{ + v--; + v |= v >> 1; + v |= v >> 2; + v |= v >> 4; + v |= v >> 8; + v |= v >> 16; + v |= v >> 32; + + return v + 1; +} + +/*********** Macros for calculating min and max **********/ + +/** + * Macro to return the minimum of two numbers + */ +#define RTE_MIN(a, b) ({ \ + typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a < _b ? _a : _b; \ + }) + +/** + * Macro to return the maximum of two numbers + */ +#define RTE_MAX(a, b) ({ \ + typeof (a) _a = (a); \ + typeof (b) _b = (b); \ + _a > _b ? _a : _b; \ + }) + +/*********** Other general functions / macros ********/ + +#ifdef __SSE2__ +#include +/** + * PAUSE instruction for tight loops (avoid busy waiting) + */ +static inline void +rte_pause (void) +{ + _mm_pause(); +} +#else +static inline void +rte_pause(void) {} +#endif + +/** + * Searches the input parameter for the least significant set bit + * (starting from zero). + * If a least significant 1 bit is found, its bit index is returned. + * If the content of the input parameter is zero, then the content of the return + * value is undefined. + * @param v + * input parameter, should not be zero. + * @return + * least significant set bit in the input parameter. + */ +static inline uint32_t +rte_bsf32(uint32_t v) +{ + return __builtin_ctz(v); +} + +#ifndef offsetof +/** Return the offset of a field in a structure. */ +#define offsetof(TYPE, MEMBER) __builtin_offsetof (TYPE, MEMBER) +#endif + +#define _RTE_STR(x) #x +/** Take a macro value and get a string version of it */ +#define RTE_STR(x) _RTE_STR(x) + +/** Mask value of type "tp" for the first "ln" bit set. */ +#define RTE_LEN2MASK(ln, tp) \ + ((tp)((uint64_t)-1 >> (sizeof(uint64_t) * CHAR_BIT - (ln)))) + +/** Number of elements in the array. */ +#define RTE_DIM(a) (sizeof (a) / sizeof ((a)[0])) + +/** + * Converts a numeric string to the equivalent uint64_t value. + * As well as straight number conversion, also recognises the suffixes + * k, m and g for kilobytes, megabytes and gigabytes respectively. + * + * If a negative number is passed in i.e. a string with the first non-black + * character being "-", zero is returned. Zero is also returned in the case of + * an error with the strtoull call in the function. + * + * @param str + * String containing number to convert. + * @return + * Number. + */ +static inline uint64_t +rte_str_to_size(const char *str) +{ + char *endptr; + unsigned long long size; + + while (isspace((int)*str)) + str++; + if (*str == '-') + return 0; + + errno = 0; + size = strtoull(str, &endptr, 0); + if (errno) + return 0; + + if (*endptr == ' ') + endptr++; /* allow 1 space gap */ + + switch (*endptr){ + case 'G': case 'g': size *= 1024; /* fall-through */ + case 'M': case 'm': size *= 1024; /* fall-through */ + case 'K': case 'k': size *= 1024; /* fall-through */ + default: + break; + } + return size; +} + +/** + * Function to terminate the application immediately, printing an error + * message and returning the exit_code back to the shell. + * + * This function never returns + * + * @param exit_code + * The exit code to be returned by the application + * @param format + * The format string to be used for printing the message. This can include + * printf format characters which will be expanded using any further parameters + * to the function. + */ +void +rte_exit(int exit_code, const char *format, ...) + __attribute__((noreturn)) + __attribute__((format(printf, 2, 3))); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/rte_debug.h b/lib/librte_eal/common/include/rte_debug.h new file mode 100644 index 00000000..94129fab --- /dev/null +++ b/lib/librte_eal/common/include/rte_debug.h @@ -0,0 +1,103 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEBUG_H_ +#define _RTE_DEBUG_H_ + +/** + * @file + * + * Debug Functions in RTE + * + * This file defines a generic API for debug operations. Part of + * the implementation is architecture-specific. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Dump the stack of the calling core to the console. + */ +void rte_dump_stack(void); + +/** + * Dump the registers of the calling core to the console. + * + * Note: Not implemented in a userapp environment; use gdb instead. + */ +void rte_dump_registers(void); + +/** + * Provide notification of a critical non-recoverable error and terminate + * execution abnormally. + * + * Display the format string and its expanded arguments (printf-like). + * + * In a linuxapp environment, this function dumps the stack and calls + * abort() resulting in a core dump if enabled. + * + * The function never returns. + * + * @param ... + * The format string, followed by the variable list of arguments. + */ +#define rte_panic(...) rte_panic_(__func__, __VA_ARGS__, "dummy") +#define rte_panic_(func, format, ...) __rte_panic(func, format "%.0s", __VA_ARGS__) + +#define RTE_VERIFY(exp) do { \ + if (!(exp)) \ + rte_panic("line %d\tassert \"" #exp "\" failed\n", __LINE__); \ +} while (0) + +/* + * Provide notification of a critical non-recoverable error and stop. + * + * This function should not be called directly. Refer to rte_panic() macro + * documentation. + */ +void __rte_panic(const char *funcname , const char *format, ...) +#ifdef __GNUC__ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) + __attribute__((cold)) +#endif +#endif + __attribute__((noreturn)) + __attribute__((format(printf, 2, 3))); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_DEBUG_H_ */ diff --git a/lib/librte_eal/common/include/rte_dev.h b/lib/librte_eal/common/include/rte_dev.h new file mode 100644 index 00000000..f1b55079 --- /dev/null +++ b/lib/librte_eal/common/include/rte_dev.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEV_H_ +#define _RTE_DEV_H_ + +/** + * @file + * + * RTE PMD Driver Registration Interface + * + * This file manages the list of device drivers. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +#include + +__attribute__((format(printf, 2, 0))) +static inline void +rte_pmd_debug_trace(const char *func_name, const char *fmt, ...) +{ + va_list ap; + + va_start(ap, fmt); + + char buffer[vsnprintf(NULL, 0, fmt, ap) + 1]; + + va_end(ap); + + va_start(ap, fmt); + vsnprintf(buffer, sizeof(buffer), fmt, ap); + va_end(ap); + + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_PMD, "%s: %s", func_name, buffer); +} + +/* Macros for checking for restricting functions to primary instance only */ +#define RTE_PROC_PRIMARY_OR_ERR_RET(retval) do { \ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ + RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ + return retval; \ + } \ +} while (0) + +#define RTE_PROC_PRIMARY_OR_RET() do { \ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) { \ + RTE_PMD_DEBUG_TRACE("Cannot run in secondary processes\n"); \ + return; \ + } \ +} while (0) + +/* Macros to check for invalid function pointers */ +#define RTE_FUNC_PTR_OR_ERR_RET(func, retval) do { \ + if ((func) == NULL) { \ + RTE_PMD_DEBUG_TRACE("Function not supported\n"); \ + return retval; \ + } \ +} while (0) + +#define RTE_FUNC_PTR_OR_RET(func) do { \ + if ((func) == NULL) { \ + RTE_PMD_DEBUG_TRACE("Function not supported\n"); \ + return; \ + } \ +} while (0) + + +/** Double linked list of device drivers. */ +TAILQ_HEAD(rte_driver_list, rte_driver); + +/** + * Initialization function called for each device driver once. + */ +typedef int (rte_dev_init_t)(const char *name, const char *args); + +/** + * Uninitilization function called for each device driver once. + */ +typedef int (rte_dev_uninit_t)(const char *name); + +/** + * Driver type enumeration + */ +enum pmd_type { + PMD_VDEV = 0, + PMD_PDEV = 1, +}; + +/** + * A structure describing a device driver. + */ +struct rte_driver { + TAILQ_ENTRY(rte_driver) next; /**< Next in list. */ + enum pmd_type type; /**< PMD Driver type */ + const char *name; /**< Driver name. */ + rte_dev_init_t *init; /**< Device init. function. */ + rte_dev_uninit_t *uninit; /**< Device uninit. function. */ +}; + +/** + * Register a device driver. + * + * @param driver + * A pointer to a rte_dev structure describing the driver + * to be registered. + */ +void rte_eal_driver_register(struct rte_driver *driver); + +/** + * Unregister a device driver. + * + * @param driver + * A pointer to a rte_dev structure describing the driver + * to be unregistered. + */ +void rte_eal_driver_unregister(struct rte_driver *driver); + +/** + * Initalize all the registered drivers in this process + */ +int rte_eal_dev_init(void); + +/** + * Initialize a driver specified by name. + * + * @param name + * The pointer to a driver name to be initialized. + * @param args + * The pointer to arguments used by driver initialization. + * @return + * 0 on success, negative on error + */ +int rte_eal_vdev_init(const char *name, const char *args); + +/** + * Uninitalize a driver specified by name. + * + * @param name + * The pointer to a driver name to be initialized. + * @return + * 0 on success, negative on error + */ +int rte_eal_vdev_uninit(const char *name); + +#define PMD_REGISTER_DRIVER(d)\ +void devinitfn_ ##d(void);\ +void __attribute__((constructor, used)) devinitfn_ ##d(void)\ +{\ + rte_eal_driver_register(&d);\ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_VDEV_H_ */ diff --git a/lib/librte_eal/common/include/rte_devargs.h b/lib/librte_eal/common/include/rte_devargs.h new file mode 100644 index 00000000..53c59f56 --- /dev/null +++ b/lib/librte_eal/common/include/rte_devargs.h @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright 2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A nor the names of its contributors + * may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_DEVARGS_H_ +#define _RTE_DEVARGS_H_ + +/** + * @file + * + * RTE devargs: list of devices and their user arguments + * + * This file stores a list of devices and their arguments given by + * the user when a DPDK application is started. These devices can be PCI + * devices or virtual devices. These devices are stored at startup in a + * list of rte_devargs structures. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** + * Type of generic device + */ +enum rte_devtype { + RTE_DEVTYPE_WHITELISTED_PCI, + RTE_DEVTYPE_BLACKLISTED_PCI, + RTE_DEVTYPE_VIRTUAL, +}; + +/** + * Structure that stores a device given by the user with its arguments + * + * A user device is a physical or a virtual device given by the user to + * the DPDK application at startup through command line arguments. + * + * The structure stores the configuration of the device, its PCI + * identifier if it's a PCI device or the driver name if it's a virtual + * device. + */ +struct rte_devargs { + /** Next in list. */ + TAILQ_ENTRY(rte_devargs) next; + /** Type of device. */ + enum rte_devtype type; + union { + /** Used if type is RTE_DEVTYPE_*_PCI. */ + struct { + /** PCI location. */ + struct rte_pci_addr addr; + } pci; + /** Used if type is RTE_DEVTYPE_VIRTUAL. */ + struct { + /** Driver name. */ + char drv_name[32]; + } virt; + }; + /** Arguments string as given by user or "" for no argument. */ + char *args; +}; + +/** user device double-linked queue type definition */ +TAILQ_HEAD(rte_devargs_list, rte_devargs); + +/** Global list of user devices */ +extern struct rte_devargs_list devargs_list; + +/** + * Parse a devargs string. + * + * For PCI devices, the format of arguments string is "PCI_ADDR" or + * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", + * "04:00.0,arg=val". + * + * For virtual devices, the format of arguments string is "DRIVER_NAME*" + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", + * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". + * + * The function parses the arguments string to get driver name and driver + * arguments. + * + * @param devargs_str + * The arguments as given by the user. + * @param drvname + * The pointer to the string to store parsed driver name. + * @param drvargs + * The pointer to the string to store parsed driver arguments. + * + * @return + * - 0 on success + * - A negative value on error + */ +int rte_eal_parse_devargs_str(const char *devargs_str, + char **drvname, char **drvargs); + +/** + * Add a device to the user device list + * + * For PCI devices, the format of arguments string is "PCI_ADDR" or + * "PCI_ADDR,key=val,key2=val2,...". Examples: "08:00.1", "0000:5:00.0", + * "04:00.0,arg=val". + * + * For virtual devices, the format of arguments string is "DRIVER_NAME*" + * or "DRIVER_NAME*,key=val,key2=val2,...". Examples: "eth_ring", + * "eth_ring0", "eth_pmdAnything,arg=0:arg2=1". The validity of the + * driver name is not checked by this function, it is done when probing + * the drivers. + * + * @param devtype + * The type of the device. + * @param devargs_str + * The arguments as given by the user. + * + * @return + * - 0 on success + * - A negative value on error + */ +int rte_eal_devargs_add(enum rte_devtype devtype, const char *devargs_str); + +/** + * Count the number of user devices of a specified type + * + * @param devtype + * The type of the devices to counted. + * + * @return + * The number of devices. + */ +unsigned int +rte_eal_devargs_type_count(enum rte_devtype devtype); + +/** + * This function dumps the list of user device and their arguments. + * + * @param f + * A pointer to a file for output + */ +void rte_eal_devargs_dump(FILE *f); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_DEVARGS_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal.h b/lib/librte_eal/common/include/rte_eal.h new file mode 100644 index 00000000..a71d6f57 --- /dev/null +++ b/lib/librte_eal/common/include/rte_eal.h @@ -0,0 +1,259 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EAL_H_ +#define _RTE_EAL_H_ + +/** + * @file + * + * EAL Configuration API + */ + +#include +#include + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MAGIC 19820526 /**< Magic number written by the main partition when ready. */ + +/* Maximum thread_name length. */ +#define RTE_MAX_THREAD_NAME_LEN 16 + +/** + * The lcore role (used in RTE or not). + */ +enum rte_lcore_role_t { + ROLE_RTE, + ROLE_OFF, +}; + +/** + * The type of process in a linuxapp, multi-process setup + */ +enum rte_proc_type_t { + RTE_PROC_AUTO = -1, /* allow auto-detection of primary/secondary */ + RTE_PROC_PRIMARY = 0, /* set to zero, so primary is the default */ + RTE_PROC_SECONDARY, + + RTE_PROC_INVALID +}; + +/** + * The global RTE configuration structure. + */ +struct rte_config { + uint32_t master_lcore; /**< Id of the master lcore */ + uint32_t lcore_count; /**< Number of available logical cores. */ + enum rte_lcore_role_t lcore_role[RTE_MAX_LCORE]; /**< State of cores. */ + + /** Primary or secondary configuration */ + enum rte_proc_type_t process_type; + + /** + * Pointer to memory configuration, which may be shared across multiple + * DPDK instances + */ + struct rte_mem_config *mem_config; +} __attribute__((__packed__)); + +/** + * Get the global configuration structure. + * + * @return + * A pointer to the global configuration structure. + */ +struct rte_config *rte_eal_get_configuration(void); + +/** + * Get a lcore's role. + * + * @param lcore_id + * The identifier of the lcore. + * @return + * The role of the lcore. + */ +enum rte_lcore_role_t rte_eal_lcore_role(unsigned lcore_id); + + +/** + * Get the process type in a multi-process setup + * + * @return + * The process type + */ +enum rte_proc_type_t rte_eal_process_type(void); + +/** + * Request iopl privilege for all RPL. + * + * This function should be called by pmds which need access to ioports. + + * @return + * - On success, returns 0. + * - On failure, returns -1. + */ +int rte_eal_iopl_init(void); + +/** + * Initialize the Environment Abstraction Layer (EAL). + * + * This function is to be executed on the MASTER lcore only, as soon + * as possible in the application's main() function. + * + * The function finishes the initialization process before main() is called. + * It puts the SLAVE lcores in the WAIT state. + * + * When the multi-partition feature is supported, depending on the + * configuration (if CONFIG_RTE_EAL_MAIN_PARTITION is disabled), this + * function waits to ensure that the magic number is set before + * returning. See also the rte_eal_get_configuration() function. Note: + * This behavior may change in the future. + * + * @param argc + * The argc argument that was given to the main() function. + * @param argv + * The argv argument that was given to the main() function. + * @return + * - On success, the number of parsed arguments, which is greater or + * equal to zero. After the call to rte_eal_init(), + * all arguments argv[x] with x < ret may be modified and should + * not be accessed by the application. + * - On failure, a negative error value. + */ +int rte_eal_init(int argc, char **argv); + +/** + * Check if a primary process is currently alive + * + * This function returns true when a primary process is currently + * active. + * + * @param config_file_path + * The config_file_path argument provided should point at the location + * that the primary process will create its config file. If NULL, the default + * config file path is used. + * + * @return + * - If alive, returns 1. + * - If dead, returns 0. + */ +int rte_eal_primary_proc_alive(const char *config_file_path); + +/** + * Usage function typedef used by the application usage function. + * + * Use this function typedef to define and call rte_set_applcation_usage_hook() + * routine. + */ +typedef void (*rte_usage_hook_t)(const char * prgname); + +/** + * Add application usage routine callout from the eal_usage() routine. + * + * This function allows the application to include its usage message + * in the EAL system usage message. The routine rte_set_application_usage_hook() + * needs to be called before the rte_eal_init() routine in the application. + * + * This routine is optional for the application and will behave as if the set + * routine was never called as the default behavior. + * + * @param usage_func + * The func argument is a function pointer to the application usage routine. + * Called function is defined using rte_usage_hook_t typedef, which is of + * the form void rte_usage_func(const char * prgname). + * + * Calling this routine with a NULL value will reset the usage hook routine and + * return the current value, which could be NULL. + * @return + * - Returns the current value of the rte_application_usage pointer to allow + * the caller to daisy chain the usage routines if needing more then one. + */ +rte_usage_hook_t +rte_set_application_usage_hook(rte_usage_hook_t usage_func); + +/** + * macro to get the lock of tailq in mem_config + */ +#define RTE_EAL_TAILQ_RWLOCK (&rte_eal_get_configuration()->mem_config->qlock) + +/** + * macro to get the multiple lock of mempool shared by mutiple-instance + */ +#define RTE_EAL_MEMPOOL_RWLOCK (&rte_eal_get_configuration()->mem_config->mplock) + +/** + * Whether EAL is using huge pages (disabled by --no-huge option). + * The no-huge mode cannot be used with UIO poll-mode drivers like igb/ixgbe. + * It is useful for NIC drivers (e.g. librte_pmd_mlx4, librte_pmd_vmxnet3) or + * crypto drivers (e.g. librte_crypto_nitrox) provided by third-parties such + * as 6WIND. + * + * @return + * Nonzero if hugepages are enabled. + */ +int rte_eal_has_hugepages(void); + +/** + * A wrap API for syscall gettid. + * + * @return + * On success, returns the thread ID of calling process. + * It is always successful. + */ +int rte_sys_gettid(void); + +/** + * Get system unique thread id. + * + * @return + * On success, returns the thread ID of calling process. + * It is always successful. + */ +static inline int rte_gettid(void) +{ + static RTE_DEFINE_PER_LCORE(int, _thread_id) = -1; + if (RTE_PER_LCORE(_thread_id) == -1) + RTE_PER_LCORE(_thread_id) = rte_sys_gettid(); + return RTE_PER_LCORE(_thread_id); +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_EAL_H_ */ diff --git a/lib/librte_eal/common/include/rte_eal_memconfig.h b/lib/librte_eal/common/include/rte_eal_memconfig.h new file mode 100644 index 00000000..2b5e0b17 --- /dev/null +++ b/lib/librte_eal/common/include/rte_eal_memconfig.h @@ -0,0 +1,100 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_EAL_MEMCONFIG_H_ +#define _RTE_EAL_MEMCONFIG_H_ + +#include +#include +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * the structure for the memory configuration for the RTE. + * Used by the rte_config structure. It is separated out, as for multi-process + * support, the memory details should be shared across instances + */ +struct rte_mem_config { + volatile uint32_t magic; /**< Magic number - Sanity check. */ + + /* memory topology */ + uint32_t nchannel; /**< Number of channels (0 if unknown). */ + uint32_t nrank; /**< Number of ranks (0 if unknown). */ + + /** + * current lock nest order + * - qlock->mlock (ring/hash/lpm) + * - mplock->qlock->mlock (mempool) + * Notice: + * *ALWAYS* obtain qlock first if having to obtain both qlock and mlock + */ + rte_rwlock_t mlock; /**< only used by memzone LIB for thread-safe. */ + rte_rwlock_t qlock; /**< used for tailq operation for thread safe. */ + rte_rwlock_t mplock; /**< only used by mempool LIB for thread-safe. */ + + uint32_t memzone_cnt; /**< Number of allocated memzones */ + + /* memory segments and zones */ + struct rte_memseg memseg[RTE_MAX_MEMSEG]; /**< Physmem descriptors. */ + struct rte_memzone memzone[RTE_MAX_MEMZONE]; /**< Memzone descriptors. */ + + struct rte_tailq_head tailq_head[RTE_MAX_TAILQ]; /**< Tailqs for objects */ + + /* Heaps of Malloc per socket */ + struct malloc_heap malloc_heaps[RTE_MAX_NUMA_NODES]; + + /* address of mem_config in primary process. used to map shared config into + * exact same address the primary process maps it. + */ + uint64_t mem_cfg_addr; +} __attribute__((__packed__)); + + +inline static void +rte_eal_mcfg_wait_complete(struct rte_mem_config* mcfg) +{ + /* wait until shared mem_config finish initialising */ + while(mcfg->magic != RTE_MAGIC) + rte_pause(); +} + +#ifdef __cplusplus +} +#endif + +#endif /*__RTE_EAL_MEMCONFIG_H_*/ diff --git a/lib/librte_eal/common/include/rte_errno.h b/lib/librte_eal/common/include/rte_errno.h new file mode 100644 index 00000000..2e5cc454 --- /dev/null +++ b/lib/librte_eal/common/include/rte_errno.h @@ -0,0 +1,95 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * API for error cause tracking + */ + +#ifndef _RTE_ERRNO_H_ +#define _RTE_ERRNO_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +RTE_DECLARE_PER_LCORE(int, _rte_errno); /**< Per core error number. */ + +/** + * Error number value, stored per-thread, which can be queried after + * calls to certain functions to determine why those functions failed. + * + * Uses standard values from errno.h wherever possible, with a small number + * of additional possible values for RTE-specific conditions. + */ +#define rte_errno RTE_PER_LCORE(_rte_errno) + +/** + * Function which returns a printable string describing a particular + * error code. For non-RTE-specific error codes, this function returns + * the value from the libc strerror function. + * + * @param errnum + * The error number to be looked up - generally the value of rte_errno + * @return + * A pointer to a thread-local string containing the text describing + * the error. + */ +const char *rte_strerror(int errnum); + +#ifndef __ELASTERROR +/** + * Check if we have a defined value for the max system-defined errno values. + * if no max defined, start from 1000 to prevent overlap with standard values + */ +#define __ELASTERROR 1000 +#endif + +/** Error types */ +enum { + RTE_MIN_ERRNO = __ELASTERROR, /**< Start numbering above std errno vals */ + + E_RTE_SECONDARY, /**< Operation not allowed in secondary processes */ + E_RTE_NO_CONFIG, /**< Missing rte_config */ + + RTE_MAX_ERRNO /**< Max RTE error number */ +}; + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_ERRNO_H_ */ diff --git a/lib/librte_eal/common/include/rte_hexdump.h b/lib/librte_eal/common/include/rte_hexdump.h new file mode 100644 index 00000000..5c18a50b --- /dev/null +++ b/lib/librte_eal/common/include/rte_hexdump.h @@ -0,0 +1,89 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_HEXDUMP_H_ +#define _RTE_HEXDUMP_H_ + +/** + * @file + * Simple API to dump out memory in a special hex format. + */ + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** +* Dump out memory in a special hex dump format. +* +* @param f +* A pointer to a file for output +* @param title +* If not NULL this string is printed as a header to the output. +* @param buf +* This is the buffer address to print out. +* @param len +* The number of bytes to dump out +* @return +* None. +*/ + +extern void +rte_hexdump(FILE *f, const char * title, const void * buf, unsigned int len); + +/** +* Dump out memory in a hex format with colons between bytes. +* +* @param f +* A pointer to a file for output +* @param title +* If not NULL this string is printed as a header to the output. +* @param buf +* This is the buffer address to print out. +* @param len +* The number of bytes to dump out +* @return +* None. +*/ + +void +rte_memdump(FILE *f, const char * title, const void * buf, unsigned int len); + + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_HEXDUMP_H_ */ diff --git a/lib/librte_eal/common/include/rte_interrupts.h b/lib/librte_eal/common/include/rte_interrupts.h new file mode 100644 index 00000000..ff11ef3a --- /dev/null +++ b/lib/librte_eal/common/include/rte_interrupts.h @@ -0,0 +1,120 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#define _RTE_INTERRUPTS_H_ + +/** + * @file + * + * The RTE interrupt interface provides functions to register/unregister + * callbacks for a specific interrupt. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** Interrupt handle */ +struct rte_intr_handle; + +/** Function to be registered for the specific interrupt */ +typedef void (*rte_intr_callback_fn)(struct rte_intr_handle *intr_handle, + void *cb_arg); + +#include + +/** + * It registers the callback for the specific interrupt. Multiple + * callbacks cal be registered at the same time. + * @param intr_handle + * Pointer to the interrupt handle. + * @param cb + * callback address. + * @param cb_arg + * address of parameter for callback. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_callback_register(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg); + +/** + * It unregisters the callback according to the specified interrupt handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * @param cb + * callback address. + * @param cb_arg + * address of parameter for callback, (void *)-1 means to remove all + * registered which has the same callback address. + * + * @return + * - On success, return the number of callback entities removed. + * - On failure, a negative value. + */ +int rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg); + +/** + * It enables the interrupt for the specified handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_enable(struct rte_intr_handle *intr_handle); + +/** + * It disables the interrupt for the specified handle. + * + * @param intr_handle + * pointer to the interrupt handle. + * + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int rte_intr_disable(struct rte_intr_handle *intr_handle); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/lib/librte_eal/common/include/rte_keepalive.h b/lib/librte_eal/common/include/rte_keepalive.h new file mode 100644 index 00000000..10dac2e0 --- /dev/null +++ b/lib/librte_eal/common/include/rte_keepalive.h @@ -0,0 +1,108 @@ +/*- + * BSD LICENSE + * + * Copyright 2015 Intel Shannon Ltd. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file rte_keepalive.h + * DPDK RTE LCore Keepalive Monitor. + * + **/ + +#ifndef _KEEPALIVE_H_ +#define _KEEPALIVE_H_ + +#include + +#ifndef RTE_KEEPALIVE_MAXCORES +/** + * Number of cores to track. + * @note Must be larger than the highest core id. */ +#define RTE_KEEPALIVE_MAXCORES RTE_MAX_LCORE +#endif + +/** + * Keepalive failure callback. + * + * Receives a data pointer passed to rte_keepalive_create() and the id of the + * failed core. + */ +typedef void (*rte_keepalive_failure_callback_t)( + void *data, + const int id_core); + +/** + * Keepalive state structure. + * @internal + */ +struct rte_keepalive; + +/** + * Initialise keepalive sub-system. + * @param callback + * Function called upon detection of a dead core. + * @param data + * Data pointer to be passed to function callback. + * @return + * Keepalive structure success, NULL on failure. + */ +struct rte_keepalive *rte_keepalive_create( + rte_keepalive_failure_callback_t callback, + void *data); + +/** + * Checks & handles keepalive state of monitored cores. + * @param *ptr_timer Triggering timer (unused) + * @param *ptr_data Data pointer (keepalive structure) + */ +void rte_keepalive_dispatch_pings(void *ptr_timer, void *ptr_data); + +/** + * Registers a core for keepalive checks. + * @param *keepcfg + * Keepalive structure pointer + * @param id_core + * ID number of core to register. + */ +void rte_keepalive_register_core(struct rte_keepalive *keepcfg, + const int id_core); + +/** + * Per-core keepalive check. + * @param *keepcfg + * Keepalive structure pointer + * + * This function needs to be called from within the main process loop of + * the LCore to be checked. + */ +void +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg); + +#endif /* _KEEPALIVE_H_ */ diff --git a/lib/librte_eal/common/include/rte_launch.h b/lib/librte_eal/common/include/rte_launch.h new file mode 100644 index 00000000..dd1946da --- /dev/null +++ b/lib/librte_eal/common/include/rte_launch.h @@ -0,0 +1,177 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LAUNCH_H_ +#define _RTE_LAUNCH_H_ + +/** + * @file + * + * Launch tasks on other lcores + */ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * State of an lcore. + */ +enum rte_lcore_state_t { + WAIT, /**< waiting a new command */ + RUNNING, /**< executing command */ + FINISHED, /**< command executed */ +}; + +/** + * Definition of a remote launch function. + */ +typedef int (lcore_function_t)(void *); + +/** + * Launch a function on another lcore. + * + * To be executed on the MASTER lcore only. + * + * Sends a message to a slave lcore (identified by the slave_id) that + * is in the WAIT state (this is true after the first call to + * rte_eal_init()). This can be checked by first calling + * rte_eal_wait_lcore(slave_id). + * + * When the remote lcore receives the message, it switches to + * the RUNNING state, then calls the function f with argument arg. Once the + * execution is done, the remote lcore switches to a FINISHED state and + * the return value of f is stored in a local variable to be read using + * rte_eal_wait_lcore(). + * + * The MASTER lcore returns as soon as the message is sent and knows + * nothing about the completion of f. + * + * Note: This function is not designed to offer optimum + * performance. It is just a practical way to launch a function on + * another lcore at initialization time. + * + * @param f + * The function to be called. + * @param arg + * The argument for the function. + * @param slave_id + * The identifier of the lcore on which the function should be executed. + * @return + * - 0: Success. Execution of function f started on the remote lcore. + * - (-EBUSY): The remote lcore is not in a WAIT state. + */ +int rte_eal_remote_launch(lcore_function_t *f, void *arg, unsigned slave_id); + +/** + * This enum indicates whether the master core must execute the handler + * launched on all logical cores. + */ +enum rte_rmt_call_master_t { + SKIP_MASTER = 0, /**< lcore handler not executed by master core. */ + CALL_MASTER, /**< lcore handler executed by master core. */ +}; + +/** + * Launch a function on all lcores. + * + * Check that each SLAVE lcore is in a WAIT state, then call + * rte_eal_remote_launch() for each lcore. + * + * @param f + * The function to be called. + * @param arg + * The argument for the function. + * @param call_master + * If call_master set to SKIP_MASTER, the MASTER lcore does not call + * the function. If call_master is set to CALL_MASTER, the function + * is also called on master before returning. In any case, the master + * lcore returns as soon as it finished its job and knows nothing + * about the completion of f on the other lcores. + * @return + * - 0: Success. Execution of function f started on all remote lcores. + * - (-EBUSY): At least one remote lcore is not in a WAIT state. In this + * case, no message is sent to any of the lcores. + */ +int rte_eal_mp_remote_launch(lcore_function_t *f, void *arg, + enum rte_rmt_call_master_t call_master); + +/** + * Get the state of the lcore identified by slave_id. + * + * To be executed on the MASTER lcore only. + * + * @param slave_id + * The identifier of the lcore. + * @return + * The state of the lcore. + */ +enum rte_lcore_state_t rte_eal_get_lcore_state(unsigned slave_id); + +/** + * Wait until an lcore finishes its job. + * + * To be executed on the MASTER lcore only. + * + * If the slave lcore identified by the slave_id is in a FINISHED state, + * switch to the WAIT state. If the lcore is in RUNNING state, wait until + * the lcore finishes its job and moves to the FINISHED state. + * + * @param slave_id + * The identifier of the lcore. + * @return + * - 0: If the lcore identified by the slave_id is in a WAIT state. + * - The value that was returned by the previous remote launch + * function call if the lcore identified by the slave_id was in a + * FINISHED or RUNNING state. In this case, it changes the state + * of the lcore to WAIT. + */ +int rte_eal_wait_lcore(unsigned slave_id); + +/** + * Wait until all lcores finish their jobs. + * + * To be executed on the MASTER lcore only. Issue an + * rte_eal_wait_lcore() for every lcore. The return values are + * ignored. + * + * After a call to rte_eal_mp_wait_lcore(), the caller can assume + * that all slave lcores are in a WAIT state. + */ +void rte_eal_mp_wait_lcore(void); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LAUNCH_H_ */ diff --git a/lib/librte_eal/common/include/rte_lcore.h b/lib/librte_eal/common/include/rte_lcore.h new file mode 100644 index 00000000..ac151302 --- /dev/null +++ b/lib/librte_eal/common/include/rte_lcore.h @@ -0,0 +1,276 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LCORE_H_ +#define _RTE_LCORE_H_ + +/** + * @file + * + * API for lcore and socket manipulation + * + */ +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define LCORE_ID_ANY UINT32_MAX /**< Any lcore. */ + +#if defined(__linux__) + typedef cpu_set_t rte_cpuset_t; +#elif defined(__FreeBSD__) +#include + typedef cpuset_t rte_cpuset_t; +#endif + +/** + * Structure storing internal configuration (per-lcore) + */ +struct lcore_config { + unsigned detected; /**< true if lcore was detected */ + pthread_t thread_id; /**< pthread identifier */ + int pipe_master2slave[2]; /**< communication pipe with master */ + int pipe_slave2master[2]; /**< communication pipe with master */ + lcore_function_t * volatile f; /**< function to call */ + void * volatile arg; /**< argument of function */ + volatile int ret; /**< return value of function */ + volatile enum rte_lcore_state_t state; /**< lcore state */ + unsigned socket_id; /**< physical socket id for this lcore */ + unsigned core_id; /**< core number on socket for this lcore */ + int core_index; /**< relative index, starting from 0 */ + rte_cpuset_t cpuset; /**< cpu set which the lcore affinity to */ +}; + +/** + * Internal configuration (per-lcore) + */ +extern struct lcore_config lcore_config[RTE_MAX_LCORE]; + +RTE_DECLARE_PER_LCORE(unsigned, _lcore_id); /**< Per thread "lcore id". */ +RTE_DECLARE_PER_LCORE(rte_cpuset_t, _cpuset); /**< Per thread "cpuset". */ + +/** + * Return the ID of the execution unit we are running on. + * @return + * Logical core ID (in EAL thread) or LCORE_ID_ANY (in non-EAL thread) + */ +static inline unsigned +rte_lcore_id(void) +{ + return RTE_PER_LCORE(_lcore_id); +} + +/** + * Get the id of the master lcore + * + * @return + * the id of the master lcore + */ +static inline unsigned +rte_get_master_lcore(void) +{ + return rte_eal_get_configuration()->master_lcore; +} + +/** + * Return the number of execution units (lcores) on the system. + * + * @return + * the number of execution units (lcores) on the system. + */ +static inline unsigned +rte_lcore_count(void) +{ + const struct rte_config *cfg = rte_eal_get_configuration(); + return cfg->lcore_count; +} + +/** + * Return the index of the lcore starting from zero. + * The order is physical or given by command line (-l option). + * + * @param lcore_id + * The targeted lcore, or -1 for the current one. + * @return + * The relative index, or -1 if not enabled. + */ +static inline int +rte_lcore_index(int lcore_id) +{ + if (lcore_id >= RTE_MAX_LCORE) + return -1; + if (lcore_id < 0) + lcore_id = rte_lcore_id(); + return lcore_config[lcore_id].core_index; +} + +/** + * Return the ID of the physical socket of the logical core we are + * running on. + * @return + * the ID of current lcoreid's physical socket + */ +unsigned rte_socket_id(void); + +/** + * Get the ID of the physical socket of the specified lcore + * + * @param lcore_id + * the targeted lcore, which MUST be between 0 and RTE_MAX_LCORE-1. + * @return + * the ID of lcoreid's physical socket + */ +static inline unsigned +rte_lcore_to_socket_id(unsigned lcore_id) +{ + return lcore_config[lcore_id].socket_id; +} + +/** + * Test if an lcore is enabled. + * + * @param lcore_id + * The identifier of the lcore, which MUST be between 0 and + * RTE_MAX_LCORE-1. + * @return + * True if the given lcore is enabled; false otherwise. + */ +static inline int +rte_lcore_is_enabled(unsigned lcore_id) +{ + struct rte_config *cfg = rte_eal_get_configuration(); + if (lcore_id >= RTE_MAX_LCORE) + return 0; + return cfg->lcore_role[lcore_id] != ROLE_OFF; +} + +/** + * Get the next enabled lcore ID. + * + * @param i + * The current lcore (reference). + * @param skip_master + * If true, do not return the ID of the master lcore. + * @param wrap + * If true, go back to 0 when RTE_MAX_LCORE is reached; otherwise, + * return RTE_MAX_LCORE. + * @return + * The next lcore_id or RTE_MAX_LCORE if not found. + */ +static inline unsigned +rte_get_next_lcore(unsigned i, int skip_master, int wrap) +{ + i++; + if (wrap) + i %= RTE_MAX_LCORE; + + while (i < RTE_MAX_LCORE) { + if (!rte_lcore_is_enabled(i) || + (skip_master && (i == rte_get_master_lcore()))) { + i++; + if (wrap) + i %= RTE_MAX_LCORE; + continue; + } + break; + } + return i; +} +/** + * Macro to browse all running lcores. + */ +#define RTE_LCORE_FOREACH(i) \ + for (i = rte_get_next_lcore(-1, 0, 0); \ + i= 2.12 supports this feature. + * + * This macro only used for Linux, BSD does direct libc call. + * BSD libc version of function is `pthread_set_name_np()`. + */ +#if defined(__DOXYGEN__) +#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) +#endif + +#if defined(__GLIBC__) && defined(__GLIBC_PREREQ) +#if __GLIBC_PREREQ(2, 12) +#define rte_thread_setname(...) pthread_setname_np(__VA_ARGS__) +#else +#define rte_thread_setname(...) 0 +#endif +#endif + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_LCORE_H_ */ diff --git a/lib/librte_eal/common/include/rte_log.h b/lib/librte_eal/common/include/rte_log.h new file mode 100644 index 00000000..2e47e7f6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_log.h @@ -0,0 +1,311 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_LOG_H_ +#define _RTE_LOG_H_ + +/** + * @file + * + * RTE Logs API + * + * This file provides a log API to RTE applications. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** The rte_log structure. */ +struct rte_logs { + uint32_t type; /**< Bitfield with enabled logs. */ + uint32_t level; /**< Log level. */ + FILE *file; /**< Pointer to current FILE* for logs. */ +}; + +/** Global log informations */ +extern struct rte_logs rte_logs; + +/* SDK log type */ +#define RTE_LOGTYPE_EAL 0x00000001 /**< Log related to eal. */ +#define RTE_LOGTYPE_MALLOC 0x00000002 /**< Log related to malloc. */ +#define RTE_LOGTYPE_RING 0x00000004 /**< Log related to ring. */ +#define RTE_LOGTYPE_MEMPOOL 0x00000008 /**< Log related to mempool. */ +#define RTE_LOGTYPE_TIMER 0x00000010 /**< Log related to timers. */ +#define RTE_LOGTYPE_PMD 0x00000020 /**< Log related to poll mode driver. */ +#define RTE_LOGTYPE_HASH 0x00000040 /**< Log related to hash table. */ +#define RTE_LOGTYPE_LPM 0x00000080 /**< Log related to LPM. */ +#define RTE_LOGTYPE_KNI 0x00000100 /**< Log related to KNI. */ +#define RTE_LOGTYPE_ACL 0x00000200 /**< Log related to ACL. */ +#define RTE_LOGTYPE_POWER 0x00000400 /**< Log related to power. */ +#define RTE_LOGTYPE_METER 0x00000800 /**< Log related to QoS meter. */ +#define RTE_LOGTYPE_SCHED 0x00001000 /**< Log related to QoS port scheduler. */ +#define RTE_LOGTYPE_PORT 0x00002000 /**< Log related to port. */ +#define RTE_LOGTYPE_TABLE 0x00004000 /**< Log related to table. */ +#define RTE_LOGTYPE_PIPELINE 0x00008000 /**< Log related to pipeline. */ +#define RTE_LOGTYPE_MBUF 0x00010000 /**< Log related to mbuf. */ +#define RTE_LOGTYPE_CRYPTODEV 0x00020000 /**< Log related to cryptodev. */ + +/* these log types can be used in an application */ +#define RTE_LOGTYPE_USER1 0x01000000 /**< User-defined log type 1. */ +#define RTE_LOGTYPE_USER2 0x02000000 /**< User-defined log type 2. */ +#define RTE_LOGTYPE_USER3 0x04000000 /**< User-defined log type 3. */ +#define RTE_LOGTYPE_USER4 0x08000000 /**< User-defined log type 4. */ +#define RTE_LOGTYPE_USER5 0x10000000 /**< User-defined log type 5. */ +#define RTE_LOGTYPE_USER6 0x20000000 /**< User-defined log type 6. */ +#define RTE_LOGTYPE_USER7 0x40000000 /**< User-defined log type 7. */ +#define RTE_LOGTYPE_USER8 0x80000000 /**< User-defined log type 8. */ + +/* Can't use 0, as it gives compiler warnings */ +#define RTE_LOG_EMERG 1U /**< System is unusable. */ +#define RTE_LOG_ALERT 2U /**< Action must be taken immediately. */ +#define RTE_LOG_CRIT 3U /**< Critical conditions. */ +#define RTE_LOG_ERR 4U /**< Error conditions. */ +#define RTE_LOG_WARNING 5U /**< Warning conditions. */ +#define RTE_LOG_NOTICE 6U /**< Normal but significant condition. */ +#define RTE_LOG_INFO 7U /**< Informational. */ +#define RTE_LOG_DEBUG 8U /**< Debug-level messages. */ + +/** The default log stream. */ +extern FILE *eal_default_log_stream; + +/** + * Change the stream that will be used by the logging system. + * + * This can be done at any time. The f argument represents the stream + * to be used to send the logs. If f is NULL, the default output is + * used (stderr). + * + * @param f + * Pointer to the stream. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_openlog_stream(FILE *f); + +/** + * Set the global log level. + * + * After this call, all logs that are lower or equal than level and + * lower or equal than the RTE_LOG_LEVEL configuration option will be + * displayed. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + */ +void rte_set_log_level(uint32_t level); + +/** + * Get the global log level. + */ +uint32_t rte_get_log_level(void); + +/** + * Enable or disable the log type. + * + * @param type + * Log type, for example, RTE_LOGTYPE_EAL. + * @param enable + * True for enable; false for disable. + */ +void rte_set_log_type(uint32_t type, int enable); + +/** + * Get the global log type. + */ +uint32_t rte_get_log_type(void); + +/** + * Get the current loglevel for the message being processed. + * + * Before calling the user-defined stream for logging, the log + * subsystem sets a per-lcore variable containing the loglevel and the + * logtype of the message being processed. This information can be + * accessed by the user-defined log output function through this + * function. + * + * @return + * The loglevel of the message being processed. + */ +int rte_log_cur_msg_loglevel(void); + +/** + * Get the current logtype for the message being processed. + * + * Before calling the user-defined stream for logging, the log + * subsystem sets a per-lcore variable containing the loglevel and the + * logtype of the message being processed. This information can be + * accessed by the user-defined log output function through this + * function. + * + * @return + * The logtype of the message being processed. + */ +int rte_log_cur_msg_logtype(void); + +/** + * Enable or disable the history (enabled by default) + * + * @param enable + * true to enable, or 0 to disable history. + */ +void rte_log_set_history(int enable); + +/** + * Dump the log history to a file + * + * @param f + * A pointer to a file for output + */ +void rte_log_dump_history(FILE *f); + +/** + * Add a log message to the history. + * + * This function can be called from a user-defined log stream. It adds + * the given message in the history that can be dumped using + * rte_log_dump_history(). + * + * @param buf + * A data buffer containing the message to be saved in the history. + * @param size + * The length of the data buffer. + * @return + * - 0: Success. + * - (-ENOBUFS) if there is no room to store the message. + */ +int rte_log_add_in_history(const char *buf, size_t size); + +/** + * Generates a log message. + * + * The message will be sent in the stream defined by the previous call + * to rte_openlog_stream(). + * + * The level argument determines if the log should be displayed or + * not, depending on the global rte_logs variable. + * + * The preferred alternative is the RTE_LOG() function because debug logs may + * be removed at compilation time if optimization is enabled. Moreover, + * logs are automatically prefixed by type when using the macro. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + * @param logtype + * The log type, for example, RTE_LOGTYPE_EAL. + * @param format + * The format string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +int rte_log(uint32_t level, uint32_t logtype, const char *format, ...) +#ifdef __GNUC__ +#if (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ > 2)) + __attribute__((cold)) +#endif +#endif + __attribute__((format(printf, 3, 4))); + +/** + * Generates a log message. + * + * The message will be sent in the stream defined by the previous call + * to rte_openlog_stream(). + * + * The level argument determines if the log should be displayed or + * not, depending on the global rte_logs variable. A trailing + * newline may be added if needed. + * + * The preferred alternative is the RTE_LOG() because debug logs may be + * removed at compilation time. + * + * @param level + * Log level. A value between RTE_LOG_EMERG (1) and RTE_LOG_DEBUG (8). + * @param logtype + * The log type, for example, RTE_LOGTYPE_EAL. + * @param format + * The format string, as in printf(3), followed by the variable arguments + * required by the format. + * @param ap + * The va_list of the variable arguments required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +int rte_vlog(uint32_t level, uint32_t logtype, const char *format, va_list ap) + __attribute__((format(printf,3,0))); + +/** + * Generates a log message. + * + * The RTE_LOG() is equivalent to rte_log() with two differences: + + * - RTE_LOG() can be used to remove debug logs at compilation time, + * depending on RTE_LOG_LEVEL configuration option, and compilation + * optimization level. If optimization is enabled, the tests + * involving constants only are pre-computed. If compilation is done + * with -O0, these tests will be done at run time. + * - The log level and log type names are smaller, for example: + * RTE_LOG(INFO, EAL, "this is a %s", "log"); + * + * @param l + * Log level. A value between EMERG (1) and DEBUG (8). The short name is + * expanded by the macro, so it cannot be an integer value. + * @param t + * The log type, for example, EAL. The short name is expanded by the + * macro, so it cannot be an integer value. + * @param ... + * The fmt string, as in printf(3), followed by the variable arguments + * required by the format. + * @return + * - 0: Success. + * - Negative on error. + */ +#define RTE_LOG(l, t, ...) \ + (void)((RTE_LOG_ ## l <= RTE_LOG_LEVEL) ? \ + rte_log(RTE_LOG_ ## l, \ + RTE_LOGTYPE_ ## t, # t ": " __VA_ARGS__) : \ + 0) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_LOG_H_ */ diff --git a/lib/librte_eal/common/include/rte_malloc.h b/lib/librte_eal/common/include/rte_malloc.h new file mode 100644 index 00000000..74bb78c7 --- /dev/null +++ b/lib/librte_eal/common/include/rte_malloc.h @@ -0,0 +1,342 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MALLOC_H_ +#define _RTE_MALLOC_H_ + +/** + * @file + * RTE Malloc. This library provides methods for dynamically allocating memory + * from hugepages. + */ + +#include +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Structure to hold heap statistics obtained from rte_malloc_get_socket_stats function. + */ +struct rte_malloc_socket_stats { + size_t heap_totalsz_bytes; /**< Total bytes on heap */ + size_t heap_freesz_bytes; /**< Total free bytes on heap */ + size_t greatest_free_size; /**< Size in bytes of largest free block */ + unsigned free_count; /**< Number of free elements on heap */ + unsigned alloc_count; /**< Number of allocated elements on heap */ + size_t heap_allocsz_bytes; /**< Total allocated bytes on heap */ +}; + +/** + * This function allocates memory from the huge-page area of memory. The memory + * is not cleared. In NUMA systems, the memory allocated resides on the same + * NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_malloc(const char *type, size_t size, unsigned align); + +/** + * Allocate zero'ed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. In NUMA systems, the memory allocated resides on the + * same NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_zmalloc(const char *type, size_t size, unsigned align); + +/** + * Replacement function for calloc(), using huge-page memory. Memory area is + * initialised with zeros. In NUMA systems, the memory allocated resides on the + * same NUMA socket as the core that calls this function. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param num + * Number of elements to be allocated. + * @param size + * Size (in bytes) of a single element. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_calloc(const char *type, size_t num, size_t size, unsigned align); + +/** + * Replacement function for realloc(), using huge-page memory. Reserved area + * memory is resized, preserving contents. In NUMA systems, the new area + * resides on the same NUMA socket as the old area. + * + * @param ptr + * Pointer to already allocated memory + * @param size + * Size (in bytes) of new area. If this is 0, memory is freed. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the reallocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align); + +/** + * This function allocates memory from the huge-page area of memory. The memory + * is not cleared. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_malloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket); + +/** + * Allocate zero'ed memory from the heap. + * + * Equivalent to rte_malloc() except that the memory zone is + * initialised with zeros. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param size + * Size (in bytes) to be allocated. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_zmalloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket); + +/** + * Replacement function for calloc(), using huge-page memory. Memory area is + * initialised with zeros. + * + * @param type + * A string identifying the type of allocated objects (useful for debug + * purposes, such as identifying the cause of a memory leak). Can be NULL. + * @param num + * Number of elements to be allocated. + * @param size + * Size (in bytes) of a single element. + * @param align + * If 0, the return is a pointer that is suitably aligned for any kind of + * variable (in the same manner as malloc()). + * Otherwise, the return is a pointer that is a multiple of *align*. In + * this case, it must obviously be a power of two. (Minimum alignment is the + * cacheline size, i.e. 64-bytes) + * @param socket + * NUMA socket to allocate memory on. If SOCKET_ID_ANY is used, this function + * will behave the same as rte_calloc(). + * @return + * - NULL on error. Not enough memory, or invalid arguments (size is 0, + * align is not a power of two). + * - Otherwise, the pointer to the allocated object. + */ +void * +rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket); + +/** + * Frees the memory space pointed to by the provided pointer. + * + * This pointer must have been returned by a previous call to + * rte_malloc(), rte_zmalloc(), rte_calloc() or rte_realloc(). The behaviour of + * rte_free() is undefined if the pointer does not match this requirement. + * + * If the pointer is NULL, the function does nothing. + * + * @param ptr + * The pointer to memory to be freed. + */ +void +rte_free(void *ptr); + +/** + * If malloc debug is enabled, check a memory block for header + * and trailer markers to indicate that all is well with the block. + * If size is non-null, also return the size of the block. + * + * @param ptr + * pointer to the start of a data block, must have been returned + * by a previous call to rte_malloc(), rte_zmalloc(), rte_calloc() + * or rte_realloc() + * @param size + * if non-null, and memory block pointer is valid, returns the size + * of the memory block + * @return + * -1 on error, invalid pointer passed or header and trailer markers + * are missing or corrupted + * 0 on success + */ +int +rte_malloc_validate(const void *ptr, size_t *size); + +/** + * Get heap statistics for the specified heap. + * + * @param socket + * An unsigned integer specifying the socket to get heap statistics for + * @param socket_stats + * A structure which provides memory to store statistics + * @return + * Null on error + * Pointer to structure storing statistics on success + */ +int +rte_malloc_get_socket_stats(int socket, + struct rte_malloc_socket_stats *socket_stats); + +/** + * Dump statistics. + * + * Dump for the specified type to the console. If the type argument is + * NULL, all memory types will be dumped. + * + * @param f + * A pointer to a file for output + * @param type + * A string identifying the type of objects to dump, or NULL + * to dump all objects. + */ +void +rte_malloc_dump_stats(FILE *f, const char *type); + +/** + * Set the maximum amount of allocated memory for this type. + * + * This is not yet implemented + * + * @param type + * A string identifying the type of allocated objects. + * @param max + * The maximum amount of allocated bytes for this type. + * @return + * - 0: Success. + * - (-1): Error. + */ +int +rte_malloc_set_limit(const char *type, size_t max); + +/** + * Return the physical address of a virtual address obtained through + * rte_malloc + * + * @param addr + * Adress obtained from a previous rte_malloc call + * @return + * NULL on error + * otherwise return physical address of the buffer + */ +phys_addr_t +rte_malloc_virt2phy(const void *addr); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MALLOC_H_ */ diff --git a/lib/librte_eal/common/include/rte_malloc_heap.h b/lib/librte_eal/common/include/rte_malloc_heap.h new file mode 100644 index 00000000..b2703562 --- /dev/null +++ b/lib/librte_eal/common/include/rte_malloc_heap.h @@ -0,0 +1,55 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MALLOC_HEAP_H_ +#define _RTE_MALLOC_HEAP_H_ + +#include +#include +#include +#include + +/* Number of free lists per heap, grouped by size. */ +#define RTE_HEAP_NUM_FREELISTS 13 + +/** + * Structure to hold malloc heap + */ +struct malloc_heap { + rte_spinlock_t lock; + LIST_HEAD(, malloc_elem) free_head[RTE_HEAP_NUM_FREELISTS]; + unsigned alloc_count; + size_t total_size; +} __rte_cache_aligned; + +#endif /* _RTE_MALLOC_HEAP_H_ */ diff --git a/lib/librte_eal/common/include/rte_memory.h b/lib/librte_eal/common/include/rte_memory.h new file mode 100644 index 00000000..f8dbece0 --- /dev/null +++ b/lib/librte_eal/common/include/rte_memory.h @@ -0,0 +1,263 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMORY_H_ +#define _RTE_MEMORY_H_ + +/** + * @file + * + * Memory-related RTE API. + */ + +#include +#include +#include + +#ifdef RTE_EXEC_ENV_LINUXAPP +#include +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +enum rte_page_sizes { + RTE_PGSIZE_4K = 1ULL << 12, + RTE_PGSIZE_64K = 1ULL << 16, + RTE_PGSIZE_256K = 1ULL << 18, + RTE_PGSIZE_2M = 1ULL << 21, + RTE_PGSIZE_16M = 1ULL << 24, + RTE_PGSIZE_256M = 1ULL << 28, + RTE_PGSIZE_512M = 1ULL << 29, + RTE_PGSIZE_1G = 1ULL << 30, + RTE_PGSIZE_4G = 1ULL << 32, + RTE_PGSIZE_16G = 1ULL << 34, +}; + +#define SOCKET_ID_ANY -1 /**< Any NUMA socket. */ +#define RTE_CACHE_LINE_MASK (RTE_CACHE_LINE_SIZE-1) /**< Cache line mask. */ + +#define RTE_CACHE_LINE_ROUNDUP(size) \ + (RTE_CACHE_LINE_SIZE * ((size + RTE_CACHE_LINE_SIZE - 1) / RTE_CACHE_LINE_SIZE)) +/**< Return the first cache-aligned value greater or equal to size. */ + +/**< Cache line size in terms of log2 */ +#if RTE_CACHE_LINE_SIZE == 64 +#define RTE_CACHE_LINE_SIZE_LOG2 6 +#elif RTE_CACHE_LINE_SIZE == 128 +#define RTE_CACHE_LINE_SIZE_LOG2 7 +#else +#error "Unsupported cache line size" +#endif + +#define RTE_CACHE_LINE_MIN_SIZE 64 /**< Minimum Cache line size. */ + +/** + * Force alignment to cache line. + */ +#define __rte_cache_aligned __rte_aligned(RTE_CACHE_LINE_SIZE) + +/** + * Force minimum cache line alignment. + */ +#define __rte_cache_min_aligned __rte_aligned(RTE_CACHE_LINE_MIN_SIZE) + +typedef uint64_t phys_addr_t; /**< Physical address definition. */ +#define RTE_BAD_PHYS_ADDR ((phys_addr_t)-1) + +/** + * Physical memory segment descriptor. + */ +struct rte_memseg { + phys_addr_t phys_addr; /**< Start physical address. */ + union { + void *addr; /**< Start virtual address. */ + uint64_t addr_64; /**< Makes sure addr is always 64 bits */ + }; +#ifdef RTE_LIBRTE_IVSHMEM + phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ +#endif + size_t len; /**< Length of the segment. */ + uint64_t hugepage_sz; /**< The pagesize of underlying memory */ + int32_t socket_id; /**< NUMA socket ID. */ + uint32_t nchannel; /**< Number of channels. */ + uint32_t nrank; /**< Number of ranks. */ +#ifdef RTE_LIBRTE_XEN_DOM0 + /**< store segment MFNs */ + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +#endif +} __rte_packed; + +/** + * Lock page in physical memory and prevent from swapping. + * + * @param virt + * The virtual address. + * @return + * 0 on success, negative on error. + */ +int rte_mem_lock_page(const void *virt); + +/** + * Get physical address of any mapped virtual address in the current process. + * It is found by browsing the /proc/self/pagemap special file. + * The page must be locked. + * + * @param virt + * The virtual address. + * @return + * The physical address or RTE_BAD_PHYS_ADDR on error. + */ +phys_addr_t rte_mem_virt2phy(const void *virt); + +/** + * Get the layout of the available physical memory. + * + * It can be useful for an application to have the full physical + * memory layout to decide the size of a memory zone to reserve. This + * table is stored in rte_config (see rte_eal_get_configuration()). + * + * @return + * - On success, return a pointer to a read-only table of struct + * rte_physmem_desc elements, containing the layout of all + * addressable physical memory. The last element of the table + * contains a NULL address. + * - On error, return NULL. This should not happen since it is a fatal + * error that will probably cause the entire system to panic. + */ +const struct rte_memseg *rte_eal_get_physmem_layout(void); + +/** + * Dump the physical memory layout to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_dump_physmem_layout(FILE *f); + +/** + * Get the total amount of available physical memory. + * + * @return + * The total amount of available physical memory in bytes. + */ +uint64_t rte_eal_get_physmem_size(void); + +/** + * Get the number of memory channels. + * + * @return + * The number of memory channels on the system. The value is 0 if unknown + * or not the same on all devices. + */ +unsigned rte_memory_get_nchannel(void); + +/** + * Get the number of memory ranks. + * + * @return + * The number of memory ranks on the system. The value is 0 if unknown or + * not the same on all devices. + */ +unsigned rte_memory_get_nrank(void); + +#ifdef RTE_LIBRTE_XEN_DOM0 + +/**< Internal use only - should DOM0 memory mapping be used */ +int rte_xen_dom0_supported(void); + +/**< Internal use only - phys to virt mapping for xen */ +phys_addr_t rte_xen_mem_phy2mch(uint32_t, const phys_addr_t); + +/** + * Return the physical address of elt, which is an element of the pool mp. + * + * @param memseg_id + * The mempool is from which memory segment. + * @param phy_addr + * physical address of elt. + * + * @return + * The physical address or error. + */ +static inline phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +{ + if (rte_xen_dom0_supported()) + return rte_xen_mem_phy2mch(memseg_id, phy_addr); + else + return phy_addr; +} + +/** + * Memory init for supporting application running on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_init(void); + +/** + * Attach to memory setments of primary process on Xen domain0. + * + * @param void + * + * @return + * 0: successfully + * negative: error + */ +int rte_xen_dom0_memory_attach(void); +#else +static inline int rte_xen_dom0_supported(void) +{ + return 0; +} + +static inline phys_addr_t +rte_mem_phy2mch(uint32_t memseg_id __rte_unused, const phys_addr_t phy_addr) +{ + return phy_addr; +} +#endif + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMORY_H_ */ diff --git a/lib/librte_eal/common/include/rte_memzone.h b/lib/librte_eal/common/include/rte_memzone.h new file mode 100644 index 00000000..f69b5a87 --- /dev/null +++ b/lib/librte_eal/common/include/rte_memzone.h @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_MEMZONE_H_ +#define _RTE_MEMZONE_H_ + +/** + * @file + * RTE Memzone + * + * The goal of the memzone allocator is to reserve contiguous + * portions of physical memory. These zones are identified by a name. + * + * The memzone descriptors are shared by all partitions and are + * located in a known place of physical memory. This zone is accessed + * using rte_eal_get_configuration(). The lookup (by name) of a + * memory zone can be done in any partition and returns the same + * physical address. + * + * A reserved memory zone cannot be unreserved. The reservation shall + * be done at initialization time only. + */ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define RTE_MEMZONE_2MB 0x00000001 /**< Use 2MB pages. */ +#define RTE_MEMZONE_1GB 0x00000002 /**< Use 1GB pages. */ +#define RTE_MEMZONE_16MB 0x00000100 /**< Use 16MB pages. */ +#define RTE_MEMZONE_16GB 0x00000200 /**< Use 16GB pages. */ +#define RTE_MEMZONE_256KB 0x00010000 /**< Use 256KB pages. */ +#define RTE_MEMZONE_256MB 0x00020000 /**< Use 256MB pages. */ +#define RTE_MEMZONE_512MB 0x00040000 /**< Use 512MB pages. */ +#define RTE_MEMZONE_4GB 0x00080000 /**< Use 4GB pages. */ +#define RTE_MEMZONE_SIZE_HINT_ONLY 0x00000004 /**< Use available page size */ + +/** + * A structure describing a memzone, which is a contiguous portion of + * physical memory identified by a name. + */ +struct rte_memzone { + +#define RTE_MEMZONE_NAMESIZE 32 /**< Maximum length of memory zone name.*/ + char name[RTE_MEMZONE_NAMESIZE]; /**< Name of the memory zone. */ + + phys_addr_t phys_addr; /**< Start physical address. */ + union { + void *addr; /**< Start virtual address. */ + uint64_t addr_64; /**< Makes sure addr is always 64-bits */ + }; +#ifdef RTE_LIBRTE_IVSHMEM + phys_addr_t ioremap_addr; /**< Real physical address inside the VM */ +#endif + size_t len; /**< Length of the memzone. */ + + uint64_t hugepage_sz; /**< The page size of underlying memory */ + + int32_t socket_id; /**< NUMA socket ID. */ + + uint32_t flags; /**< Characteristics of this memzone. */ + uint32_t memseg_id; /**< Memseg it belongs. */ +} __attribute__((__packed__)); + +/** + * Reserve a portion of physical memory. + * + * This function reserves some memory and returns a pointer to a + * correctly filled memzone descriptor. If the allocation cannot be + * done, return NULL. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve(const char *name, + size_t len, int socket_id, + unsigned flags); + +/** + * Reserve a portion of physical memory with alignment on a specified + * boundary. + * + * This function reserves some memory with alignment on a specified + * boundary, and returns a pointer to a correctly filled memzone + * descriptor. If the allocation cannot be done or if the alignment + * is not a power of 2, returns NULL. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve_aligned(const char *name, + size_t len, int socket_id, + unsigned flags, unsigned align); + +/** + * Reserve a portion of physical memory with specified alignment and + * boundary. + * + * This function reserves some memory with specified alignment and + * boundary, and returns a pointer to a correctly filled memzone + * descriptor. If the allocation cannot be done or if the alignment + * or boundary are not a power of 2, returns NULL. + * Memory buffer is reserved in a way, that it wouldn't cross specified + * boundary. That implies that requested length should be less or equal + * then boundary. + * + * @param name + * The name of the memzone. If it already exists, the function will + * fail and return NULL. + * @param len + * The size of the memory to be reserved. If it + * is 0, the biggest contiguous zone will be reserved. + * @param socket_id + * The socket identifier in the case of + * NUMA. The value can be SOCKET_ID_ANY if there is no NUMA + * constraint for the reserved zone. + * @param flags + * The flags parameter is used to request memzones to be + * taken from specifically sized hugepages. + * - RTE_MEMZONE_2MB - Reserved from 2MB pages + * - RTE_MEMZONE_1GB - Reserved from 1GB pages + * - RTE_MEMZONE_16MB - Reserved from 16MB pages + * - RTE_MEMZONE_16GB - Reserved from 16GB pages + * - RTE_MEMZONE_256KB - Reserved from 256KB pages + * - RTE_MEMZONE_256MB - Reserved from 256MB pages + * - RTE_MEMZONE_512MB - Reserved from 512MB pages + * - RTE_MEMZONE_4GB - Reserved from 4GB pages + * - RTE_MEMZONE_SIZE_HINT_ONLY - Allow alternative page size to be used if + * the requested page size is unavailable. + * If this flag is not set, the function + * will return error on an unavailable size + * request. + * @param align + * Alignment for resulting memzone. Must be a power of 2. + * @param bound + * Boundary for resulting memzone. Must be a power of 2 or zero. + * Zero value implies no boundary condition. + * @return + * A pointer to a correctly-filled read-only memzone descriptor, or NULL + * on error. + * On error case, rte_errno will be set appropriately: + * - E_RTE_NO_CONFIG - function could not get pointer to rte_config structure + * - E_RTE_SECONDARY - function was called from a secondary process instance + * - ENOSPC - the maximum number of memzones has already been allocated + * - EEXIST - a memzone with the same name already exists + * - ENOMEM - no appropriate memory area found in which to create memzone + * - EINVAL - invalid parameters + */ +const struct rte_memzone *rte_memzone_reserve_bounded(const char *name, + size_t len, int socket_id, + unsigned flags, unsigned align, unsigned bound); + +/** + * Free a memzone. + * + * Note: an IVSHMEM zone cannot be freed. + * + * @param mz + * A pointer to the memzone + * @return + * -EINVAL - invalid parameter, IVSHMEM memzone. + * 0 - success + */ +int rte_memzone_free(const struct rte_memzone *mz); + +/** + * Lookup for a memzone. + * + * Get a pointer to a descriptor of an already reserved memory + * zone identified by the name given as an argument. + * + * @param name + * The name of the memzone. + * @return + * A pointer to a read-only memzone descriptor. + */ +const struct rte_memzone *rte_memzone_lookup(const char *name); + +/** + * Dump all reserved memzones to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_memzone_dump(FILE *f); + +/** + * Walk list of all memzones + * + * @param func + * Iterator function + * @param arg + * Argument passed to iterator + */ +void rte_memzone_walk(void (*func)(const struct rte_memzone *, void *arg), + void *arg); + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_MEMZONE_H_ */ diff --git a/lib/librte_eal/common/include/rte_pci.h b/lib/librte_eal/common/include/rte_pci.h new file mode 100644 index 00000000..e692094e --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci.h @@ -0,0 +1,598 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright 2013-2014 6WIND S.A. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_H_ +#define _RTE_PCI_H_ + +/** + * @file + * + * RTE PCI Interface + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include +#include +#include +#include + +#include + +TAILQ_HEAD(pci_device_list, rte_pci_device); /**< PCI devices in D-linked Q. */ +TAILQ_HEAD(pci_driver_list, rte_pci_driver); /**< PCI drivers in D-linked Q. */ + +extern struct pci_driver_list pci_driver_list; /**< Global list of PCI drivers. */ +extern struct pci_device_list pci_device_list; /**< Global list of PCI devices. */ + +/** Pathname of PCI devices directory. */ +#define SYSFS_PCI_DEVICES "/sys/bus/pci/devices" + +/** Formatting string for PCI device identifier: Ex: 0000:00:01.0 */ +#define PCI_PRI_FMT "%.4" PRIx16 ":%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Short formatting string, without domain, for PCI device: Ex: 00:01.0 */ +#define PCI_SHORT_PRI_FMT "%.2" PRIx8 ":%.2" PRIx8 ".%" PRIx8 + +/** Nb. of values in PCI device identifier format string. */ +#define PCI_FMT_NVAL 4 + +/** Nb. of values in PCI resource format. */ +#define PCI_RESOURCE_FMT_NVAL 3 + +/** IO resource type: memory address space */ +#define IORESOURCE_MEM 0x00000200 + +/** + * A structure describing a PCI resource. + */ +struct rte_pci_resource { + uint64_t phys_addr; /**< Physical address, 0 if no resource. */ + uint64_t len; /**< Length of the resource. */ + void *addr; /**< Virtual address, NULL when not mapped. */ +}; + +/** Maximum number of PCI resources. */ +#define PCI_MAX_RESOURCE 6 + +/** + * A structure describing an ID for a PCI driver. Each driver provides a + * table of these IDs for each device that it supports. + */ +struct rte_pci_id { + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint16_t subsystem_vendor_id; /**< Subsystem vendor ID or PCI_ANY_ID. */ + uint16_t subsystem_device_id; /**< Subsystem device ID or PCI_ANY_ID. */ +}; + +/** + * A structure describing the location of a PCI device. + */ +struct rte_pci_addr { + uint16_t domain; /**< Device domain */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ +}; + +struct rte_devargs; + +enum rte_kernel_driver { + RTE_KDRV_UNKNOWN = 0, + RTE_KDRV_IGB_UIO, + RTE_KDRV_VFIO, + RTE_KDRV_UIO_GENERIC, + RTE_KDRV_NIC_UIO, + RTE_KDRV_NONE, +}; + +/** + * A structure describing a PCI device. + */ +struct rte_pci_device { + TAILQ_ENTRY(rte_pci_device) next; /**< Next probed PCI device. */ + struct rte_pci_addr addr; /**< PCI location. */ + struct rte_pci_id id; /**< PCI ID. */ + struct rte_pci_resource mem_resource[PCI_MAX_RESOURCE]; /**< PCI Memory Resource */ + struct rte_intr_handle intr_handle; /**< Interrupt handle */ + struct rte_pci_driver *driver; /**< Associated driver */ + uint16_t max_vfs; /**< sriov enable if not zero */ + int numa_node; /**< NUMA node connection */ + struct rte_devargs *devargs; /**< Device user arguments */ + enum rte_kernel_driver kdrv; /**< Kernel driver passthrough */ +}; + +/** Any PCI device identifier (vendor, device, ...) */ +#define PCI_ANY_ID (0xffff) + +#ifdef __cplusplus +/** C++ macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + (vend), \ + (dev), \ + PCI_ANY_ID, \ + PCI_ANY_ID +#else +/** Macro used to help building up tables of device IDs */ +#define RTE_PCI_DEVICE(vend, dev) \ + .vendor_id = (vend), \ + .device_id = (dev), \ + .subsystem_vendor_id = PCI_ANY_ID, \ + .subsystem_device_id = PCI_ANY_ID +#endif + +struct rte_pci_driver; + +/** + * Initialisation function for the driver called during PCI probing. + */ +typedef int (pci_devinit_t)(struct rte_pci_driver *, struct rte_pci_device *); + +/** + * Uninitialisation function for the driver called during hotplugging. + */ +typedef int (pci_devuninit_t)(struct rte_pci_device *); + +/** + * A structure describing a PCI driver. + */ +struct rte_pci_driver { + TAILQ_ENTRY(rte_pci_driver) next; /**< Next in list. */ + const char *name; /**< Driver name. */ + pci_devinit_t *devinit; /**< Device init. function. */ + pci_devuninit_t *devuninit; /**< Device uninit function. */ + const struct rte_pci_id *id_table; /**< ID table, NULL terminated. */ + uint32_t drv_flags; /**< Flags contolling handling of device. */ +}; + +/** Device needs PCI BAR mapping (done with either IGB_UIO or VFIO) */ +#define RTE_PCI_DRV_NEED_MAPPING 0x0001 +/** Device driver must be registered several times until failure - deprecated */ +#pragma GCC poison RTE_PCI_DRV_MULTIPLE +/** Device needs to be unbound even if no module is provided */ +#define RTE_PCI_DRV_FORCE_UNBIND 0x0004 +/** Device driver supports link state interrupt */ +#define RTE_PCI_DRV_INTR_LSC 0x0008 +/** Device driver supports detaching capability */ +#define RTE_PCI_DRV_DETACHABLE 0x0010 + +/** + * A structure describing a PCI mapping. + */ +struct pci_map { + void *addr; + char *path; + uint64_t offset; + uint64_t size; + uint64_t phaddr; +}; + +/** + * A structure describing a mapped PCI resource. + * For multi-process we need to reproduce all PCI mappings in secondary + * processes, so save them in a tailq. + */ +struct mapped_pci_resource { + TAILQ_ENTRY(mapped_pci_resource) next; + + struct rte_pci_addr pci_addr; + char path[PATH_MAX]; + int nb_maps; + struct pci_map maps[PCI_MAX_RESOURCE]; +}; + +/** mapped pci device list */ +TAILQ_HEAD(mapped_pci_res_list, mapped_pci_resource); + +/**< Internal use only - Macro used by pci addr parsing functions **/ +#define GET_PCIADDR_FIELD(in, fd, lim, dlm) \ +do { \ + unsigned long val; \ + char *end; \ + errno = 0; \ + val = strtoul((in), &end, 16); \ + if (errno != 0 || end[0] != (dlm) || val > (lim)) \ + return -EINVAL; \ + (fd) = (typeof (fd))val; \ + (in) = end + 1; \ +} while(0) + +/** + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided without + * a domain prefix (i.e. domain returned is always 0) + * + * @param input + * The input string to be parsed. Should have the format XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned. Domain will always be + * returned as 0 + * @return + * 0 on success, negative on error. + */ +static inline int +eal_parse_pci_BDF(const char *input, struct rte_pci_addr *dev_addr) +{ + dev_addr->domain = 0; + GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); + GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); + return 0; +} + +/** + * Utility function to produce a PCI Bus-Device-Function value + * given a string representation. Assumes that the BDF is provided including + * a domain prefix. + * + * @param input + * The input string to be parsed. Should have the format XXXX:XX:XX.X + * @param dev_addr + * The PCI Bus-Device-Function address to be returned + * @return + * 0 on success, negative on error. + */ +static inline int +eal_parse_pci_DomBDF(const char *input, struct rte_pci_addr *dev_addr) +{ + GET_PCIADDR_FIELD(input, dev_addr->domain, UINT16_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->bus, UINT8_MAX, ':'); + GET_PCIADDR_FIELD(input, dev_addr->devid, UINT8_MAX, '.'); + GET_PCIADDR_FIELD(input, dev_addr->function, UINT8_MAX, 0); + return 0; +} +#undef GET_PCIADDR_FIELD + +/* Compare two PCI device addresses. */ +/** + * Utility function to compare two PCI device addresses. + * + * @param addr + * The PCI Bus-Device-Function address to compare + * @param addr2 + * The PCI Bus-Device-Function address to compare + * @return + * 0 on equal PCI address. + * Positive on addr is greater than addr2. + * Negative on addr is less than addr2, or error. + */ +static inline int +rte_eal_compare_pci_addr(const struct rte_pci_addr *addr, + const struct rte_pci_addr *addr2) +{ + uint64_t dev_addr, dev_addr2; + + if ((addr == NULL) || (addr2 == NULL)) + return -1; + + dev_addr = (addr->domain << 24) | (addr->bus << 16) | + (addr->devid << 8) | addr->function; + dev_addr2 = (addr2->domain << 24) | (addr2->bus << 16) | + (addr2->devid << 8) | addr2->function; + + if (dev_addr > dev_addr2) + return 1; + else if (dev_addr < dev_addr2) + return -1; + else + return 0; +} + +/** + * Scan the content of the PCI bus, and the devices in the devices + * list + * + * @return + * 0 on success, negative on error + */ +int rte_eal_pci_scan(void); + +/** + * Probe the PCI bus for registered drivers. + * + * Scan the content of the PCI bus, and call the probe() function for + * all registered drivers that have a matching entry in its id_table + * for discovered devices. + * + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_probe(void); + +/** + * Map the PCI device resources in user space virtual memory address + * + * Note that driver should not call this function when flag + * RTE_PCI_DRV_NEED_MAPPING is set, as EAL will do that for + * you when it's on. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + * + * @return + * 0 on success, negative on error and positive if no driver + * is found for the device. + */ +int rte_eal_pci_map_device(struct rte_pci_device *dev); + +/** + * Unmap this device + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void rte_eal_pci_unmap_device(struct rte_pci_device *dev); + +/** + * @internal + * Map a particular resource from a file. + * + * @param requested_addr + * The starting address for the new mapping range. + * @param fd + * The file descriptor. + * @param offset + * The offset for the mapping range. + * @param size + * The size for the mapping range. + * @param additional_flags + * The additional flags for the mapping range. + * @return + * - On success, the function returns a pointer to the mapped area. + * - On error, the value MAP_FAILED is returned. + */ +void *pci_map_resource(void *requested_addr, int fd, off_t offset, + size_t size, int additional_flags); + +/** + * @internal + * Unmap a particular resource. + * + * @param requested_addr + * The address for the unmapping range. + * @param size + * The size for the unmapping range. + */ +void pci_unmap_resource(void *requested_addr, size_t size); + +/** + * Probe the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the probe() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to probe. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_probe_one(const struct rte_pci_addr *addr); + +/** + * Close the single PCI device. + * + * Scan the content of the PCI bus, and find the pci device specified by pci + * address, then call the devuninit() function for registered driver that has a + * matching entry in its id_table for discovered device. + * + * @param addr + * The PCI Bus-Device-Function address to close. + * @return + * - 0 on success. + * - Negative on error. + */ +int rte_eal_pci_detach(const struct rte_pci_addr *addr); + +/** + * Dump the content of the PCI bus. + * + * @param f + * A pointer to a file for output + */ +void rte_eal_pci_dump(FILE *f); + +/** + * Register a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be registered. + */ +void rte_eal_pci_register(struct rte_pci_driver *driver); + +/** + * Unregister a PCI driver. + * + * @param driver + * A pointer to a rte_pci_driver structure describing the driver + * to be unregistered. + */ +void rte_eal_pci_unregister(struct rte_pci_driver *driver); + +/** + * Read PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_eal_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset); + +/** + * Write PCI config space. + * + * @param device + * A pointer to a rte_pci_device structure describing the device + * to use + * @param buf + * A data buffer containing the bytes should be written + * @param len + * The length of the data buffer. + * @param offset + * The offset into PCI config space + */ +int rte_eal_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset); + +/** + * A structure used to access io resources for a pci device. + * rte_pci_ioport is arch, os, driver specific, and should not be used outside + * of pci ioport api. + */ +struct rte_pci_ioport { + struct rte_pci_device *dev; + uint64_t base; +}; + +/** + * Initialises a rte_pci_ioport object for a pci device io resource. + * This object is then used to gain access to those io resources (see below). + * + * @param dev + * A pointer to a rte_pci_device structure describing the device. + * to use + * @param bar + * Index of the io pci resource we want to access. + * @param p + * The rte_pci_ioport object to be initialized. + * @return + * 0 on success, negative on error. + */ +int rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); + +/** + * Release any resources used in a rte_pci_ioport object. + * + * @param p + * The rte_pci_ioport object to be uninitialized. + */ +int rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p); + +/** + * Read from a io pci resource. + * + * @param p + * The rte_pci_ioport object from which we want to read. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_eal_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); + +/** + * Write to a io pci resource. + * + * @param p + * The rte_pci_ioport object to which we want to write. + * @param data + * A data buffer where the bytes should be read into + * @param len + * The length of the data buffer. + * @param offset + * The offset into the pci io resource. + */ +void rte_eal_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); + +#ifdef RTE_PCI_CONFIG +#include +/** + * Set special config space registers for performance purpose. + * It is deprecated, as all configurations have been moved into + * each PMDs respectively. + * + * @param dev + * A pointer to a rte_pci_device structure describing the device + * to use + */ +void pci_config_space_set(struct rte_pci_device *dev) __rte_deprecated; +#endif /* RTE_PCI_CONFIG */ + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PCI_H_ */ diff --git a/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h new file mode 100644 index 00000000..08222510 --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci_dev_feature_defs.h @@ -0,0 +1,70 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_DEV_DEFS_H_ +#define _RTE_PCI_DEV_DEFS_H_ + +/* interrupt mode */ +enum rte_intr_mode { + RTE_INTR_MODE_NONE = 0, + RTE_INTR_MODE_LEGACY, + RTE_INTR_MODE_MSI, + RTE_INTR_MODE_MSIX +}; + +#endif /* _RTE_PCI_DEV_DEFS_H_ */ diff --git a/lib/librte_eal/common/include/rte_pci_dev_features.h b/lib/librte_eal/common/include/rte_pci_dev_features.h new file mode 100644 index 00000000..67b986a6 --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci_dev_features.h @@ -0,0 +1,69 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PCI_DEV_FEATURES_H +#define _RTE_PCI_DEV_FEATURES_H + +#include + +#define RTE_INTR_MODE_NONE_NAME "none" +#define RTE_INTR_MODE_LEGACY_NAME "legacy" +#define RTE_INTR_MODE_MSI_NAME "msi" +#define RTE_INTR_MODE_MSIX_NAME "msix" + +#endif diff --git a/lib/librte_eal/common/include/rte_pci_dev_ids.h b/lib/librte_eal/common/include/rte_pci_dev_ids.h new file mode 100644 index 00000000..cf7b5487 --- /dev/null +++ b/lib/librte_eal/common/include/rte_pci_dev_ids.h @@ -0,0 +1,704 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +/** + * @file + * + * This file contains a list of the PCI device IDs recognised by DPDK, which + * can be used to fill out an array of structures describing the devices. + * + * Currently four families of devices are recognised: those supported by the + * IGB driver, by EM driver, those supported by the IXGBE driver, and by virtio + * driver which is a para virtualization driver running in guest virtual machine. + * The inclusion of these in an array built using this file depends on the + * definition of + * RTE_PCI_DEV_ID_DECL_EM + * RTE_PCI_DEV_ID_DECL_IGB + * RTE_PCI_DEV_ID_DECL_IGBVF + * RTE_PCI_DEV_ID_DECL_IXGBE + * RTE_PCI_DEV_ID_DECL_IXGBEVF + * RTE_PCI_DEV_ID_DECL_I40E + * RTE_PCI_DEV_ID_DECL_I40EVF + * RTE_PCI_DEV_ID_DECL_VIRTIO + * at the time when this file is included. + * + * In order to populate an array, the user of this file must define this macro: + * RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID). For example: + * + * @code + * struct device { + * int vend; + * int dev; + * }; + * + * struct device devices[] = { + * #define RTE_PCI_DEV_ID_DECL_IXGBE(vendorID, deviceID) {vend, dev}, + * #include + * }; + * @endcode + * + * Note that this file can be included multiple times within the same file. + */ + +#ifndef RTE_PCI_DEV_ID_DECL_EM +#define RTE_PCI_DEV_ID_DECL_EM(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IGB +#define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IGBVF +#define RTE_PCI_DEV_ID_DECL_IGBVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IXGBE +#define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_IXGBEVF +#define RTE_PCI_DEV_ID_DECL_IXGBEVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_I40E +#define RTE_PCI_DEV_ID_DECL_I40E(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_I40EVF +#define RTE_PCI_DEV_ID_DECL_I40EVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_VIRTIO +#define RTE_PCI_DEV_ID_DECL_VIRTIO(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_VMXNET3 +#define RTE_PCI_DEV_ID_DECL_VMXNET3(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_FM10K +#define RTE_PCI_DEV_ID_DECL_FM10K(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_FM10KVF +#define RTE_PCI_DEV_ID_DECL_FM10KVF(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_ENIC +#define RTE_PCI_DEV_ID_DECL_ENIC(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_BNX2X +#define RTE_PCI_DEV_ID_DECL_BNX2X(vend, dev) +#endif + +#ifndef RTE_PCI_DEV_ID_DECL_BNX2XVF +#define RTE_PCI_DEV_ID_DECL_BNX2XVF(vend, dev) +#endif + +#ifndef PCI_VENDOR_ID_INTEL +/** Vendor ID used by Intel devices */ +#define PCI_VENDOR_ID_INTEL 0x8086 +#endif + +#ifndef PCI_VENDOR_ID_QUMRANET +/** Vendor ID used by virtio devices */ +#define PCI_VENDOR_ID_QUMRANET 0x1AF4 +#endif + +#ifndef PCI_VENDOR_ID_VMWARE +/** Vendor ID used by VMware devices */ +#define PCI_VENDOR_ID_VMWARE 0x15AD +#endif + +#ifndef PCI_VENDOR_ID_CISCO +/** Vendor ID used by Cisco VIC devices */ +#define PCI_VENDOR_ID_CISCO 0x1137 +#endif + +#ifndef PCI_VENDOR_ID_BROADCOM +/** Vendor ID used by Broadcom devices */ +#define PCI_VENDOR_ID_BROADCOM 0x14E4 +#endif + +/******************** Physical EM devices from e1000_hw.h ********************/ + +#define E1000_DEV_ID_82542 0x1000 +#define E1000_DEV_ID_82543GC_FIBER 0x1001 +#define E1000_DEV_ID_82543GC_COPPER 0x1004 +#define E1000_DEV_ID_82544EI_COPPER 0x1008 +#define E1000_DEV_ID_82544EI_FIBER 0x1009 +#define E1000_DEV_ID_82544GC_COPPER 0x100C +#define E1000_DEV_ID_82544GC_LOM 0x100D +#define E1000_DEV_ID_82540EM 0x100E +#define E1000_DEV_ID_82540EM_LOM 0x1015 +#define E1000_DEV_ID_82540EP_LOM 0x1016 +#define E1000_DEV_ID_82540EP 0x1017 +#define E1000_DEV_ID_82540EP_LP 0x101E +#define E1000_DEV_ID_82545EM_COPPER 0x100F +#define E1000_DEV_ID_82545EM_FIBER 0x1011 +#define E1000_DEV_ID_82545GM_COPPER 0x1026 +#define E1000_DEV_ID_82545GM_FIBER 0x1027 +#define E1000_DEV_ID_82545GM_SERDES 0x1028 +#define E1000_DEV_ID_82546EB_COPPER 0x1010 +#define E1000_DEV_ID_82546EB_FIBER 0x1012 +#define E1000_DEV_ID_82546EB_QUAD_COPPER 0x101D +#define E1000_DEV_ID_82546GB_COPPER 0x1079 +#define E1000_DEV_ID_82546GB_FIBER 0x107A +#define E1000_DEV_ID_82546GB_SERDES 0x107B +#define E1000_DEV_ID_82546GB_PCIE 0x108A +#define E1000_DEV_ID_82546GB_QUAD_COPPER 0x1099 +#define E1000_DEV_ID_82546GB_QUAD_COPPER_KSP3 0x10B5 +#define E1000_DEV_ID_82541EI 0x1013 +#define E1000_DEV_ID_82541EI_MOBILE 0x1018 +#define E1000_DEV_ID_82541ER_LOM 0x1014 +#define E1000_DEV_ID_82541ER 0x1078 +#define E1000_DEV_ID_82541GI 0x1076 +#define E1000_DEV_ID_82541GI_LF 0x107C +#define E1000_DEV_ID_82541GI_MOBILE 0x1077 +#define E1000_DEV_ID_82547EI 0x1019 +#define E1000_DEV_ID_82547EI_MOBILE 0x101A +#define E1000_DEV_ID_82547GI 0x1075 +#define E1000_DEV_ID_82571EB_COPPER 0x105E +#define E1000_DEV_ID_82571EB_FIBER 0x105F +#define E1000_DEV_ID_82571EB_SERDES 0x1060 +#define E1000_DEV_ID_82571EB_SERDES_DUAL 0x10D9 +#define E1000_DEV_ID_82571EB_SERDES_QUAD 0x10DA +#define E1000_DEV_ID_82571EB_QUAD_COPPER 0x10A4 +#define E1000_DEV_ID_82571PT_QUAD_COPPER 0x10D5 +#define E1000_DEV_ID_82571EB_QUAD_FIBER 0x10A5 +#define E1000_DEV_ID_82571EB_QUAD_COPPER_LP 0x10BC +#define E1000_DEV_ID_82572EI_COPPER 0x107D +#define E1000_DEV_ID_82572EI_FIBER 0x107E +#define E1000_DEV_ID_82572EI_SERDES 0x107F +#define E1000_DEV_ID_82572EI 0x10B9 +#define E1000_DEV_ID_82573E 0x108B +#define E1000_DEV_ID_82573E_IAMT 0x108C +#define E1000_DEV_ID_82573L 0x109A +#define E1000_DEV_ID_82574L 0x10D3 +#define E1000_DEV_ID_82574LA 0x10F6 +#define E1000_DEV_ID_82583V 0x150C +#define E1000_DEV_ID_80003ES2LAN_COPPER_DPT 0x1096 +#define E1000_DEV_ID_80003ES2LAN_SERDES_DPT 0x1098 +#define E1000_DEV_ID_80003ES2LAN_COPPER_SPT 0x10BA +#define E1000_DEV_ID_80003ES2LAN_SERDES_SPT 0x10BB +#define E1000_DEV_ID_ICH8_82567V_3 0x1501 +#define E1000_DEV_ID_ICH8_IGP_M_AMT 0x1049 +#define E1000_DEV_ID_ICH8_IGP_AMT 0x104A +#define E1000_DEV_ID_ICH8_IGP_C 0x104B +#define E1000_DEV_ID_ICH8_IFE 0x104C +#define E1000_DEV_ID_ICH8_IFE_GT 0x10C4 +#define E1000_DEV_ID_ICH8_IFE_G 0x10C5 +#define E1000_DEV_ID_ICH8_IGP_M 0x104D +#define E1000_DEV_ID_ICH9_IGP_M 0x10BF +#define E1000_DEV_ID_ICH9_IGP_M_AMT 0x10F5 +#define E1000_DEV_ID_ICH9_IGP_M_V 0x10CB +#define E1000_DEV_ID_ICH9_IGP_AMT 0x10BD +#define E1000_DEV_ID_ICH9_BM 0x10E5 +#define E1000_DEV_ID_ICH9_IGP_C 0x294C +#define E1000_DEV_ID_ICH9_IFE 0x10C0 +#define E1000_DEV_ID_ICH9_IFE_GT 0x10C3 +#define E1000_DEV_ID_ICH9_IFE_G 0x10C2 +#define E1000_DEV_ID_ICH10_R_BM_LM 0x10CC +#define E1000_DEV_ID_ICH10_R_BM_LF 0x10CD +#define E1000_DEV_ID_ICH10_R_BM_V 0x10CE +#define E1000_DEV_ID_ICH10_D_BM_LM 0x10DE +#define E1000_DEV_ID_ICH10_D_BM_LF 0x10DF +#define E1000_DEV_ID_ICH10_D_BM_V 0x1525 + +#define E1000_DEV_ID_PCH_M_HV_LM 0x10EA +#define E1000_DEV_ID_PCH_M_HV_LC 0x10EB +#define E1000_DEV_ID_PCH_D_HV_DM 0x10EF +#define E1000_DEV_ID_PCH_D_HV_DC 0x10F0 +#define E1000_DEV_ID_PCH2_LV_LM 0x1502 +#define E1000_DEV_ID_PCH2_LV_V 0x1503 +#define E1000_DEV_ID_PCH_LPT_I217_LM 0x153A +#define E1000_DEV_ID_PCH_LPT_I217_V 0x153B +#define E1000_DEV_ID_PCH_LPTLP_I218_LM 0x155A +#define E1000_DEV_ID_PCH_LPTLP_I218_V 0x1559 +#define E1000_DEV_ID_PCH_I218_LM2 0x15A0 +#define E1000_DEV_ID_PCH_I218_V2 0x15A1 +#define E1000_DEV_ID_PCH_I218_LM3 0x15A2 +#define E1000_DEV_ID_PCH_I218_V3 0x15A3 + + +/* + * Tested (supported) on VM emulated HW. + */ + +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82540EM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82545EM_FIBER) + +/* + * Tested (supported) on real HW. + */ + +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82546EB_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_DUAL) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_SERDES_QUAD) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571PT_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82571EB_QUAD_COPPER_LP) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_COPPER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_FIBER) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI_SERDES) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82572EI) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82573L) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574L) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82574LA) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82583V) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_LM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPT_I217_V) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_LM) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_LPTLP_I218_V) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM2) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V2) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_LM3) +RTE_PCI_DEV_ID_DECL_EM(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_PCH_I218_V3) + + +/******************** Physical IGB devices from e1000_hw.h ********************/ + +#define E1000_DEV_ID_82576 0x10C9 +#define E1000_DEV_ID_82576_FIBER 0x10E6 +#define E1000_DEV_ID_82576_SERDES 0x10E7 +#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_NS_SERDES 0x1518 +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D +#define E1000_DEV_ID_82575EB_COPPER 0x10A7 +#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 +#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 +#define E1000_DEV_ID_82580_COPPER 0x150E +#define E1000_DEV_ID_82580_FIBER 0x150F +#define E1000_DEV_ID_82580_SERDES 0x1510 +#define E1000_DEV_ID_82580_SGMII 0x1511 +#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 +#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_I350_COPPER 0x1521 +#define E1000_DEV_ID_I350_FIBER 0x1522 +#define E1000_DEV_ID_I350_SERDES 0x1523 +#define E1000_DEV_ID_I350_SGMII 0x1524 +#define E1000_DEV_ID_I350_DA4 0x1546 +#define E1000_DEV_ID_I210_COPPER 0x1533 +#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 +#define E1000_DEV_ID_I210_COPPER_IT 0x1535 +#define E1000_DEV_ID_I210_FIBER 0x1536 +#define E1000_DEV_ID_I210_SERDES 0x1537 +#define E1000_DEV_ID_I210_SGMII 0x1538 +#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B +#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C +#define E1000_DEV_ID_I211_COPPER 0x1539 +#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 +#define E1000_DEV_ID_I354_SGMII 0x1F41 +#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A +#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C +#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_NS_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_SERDES_QUAD) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_COPPER_DUAL) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82580_QUAD_FIBER) + +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_DA4) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_OEM1) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_COPPER_IT) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_FIBER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I210_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I211_COPPER) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SGMII) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SERDES) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) +RTE_PCI_DEV_ID_DECL_IGB(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_DH89XXCC_SFP) + +/****************** Physical IXGBE devices from ixgbe_type.h ******************/ + +#define IXGBE_DEV_ID_82598 0x10B6 +#define IXGBE_DEV_ID_82598_BX 0x1508 +#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 +#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 +#define IXGBE_DEV_ID_82598AT 0x10C8 +#define IXGBE_DEV_ID_82598AT2 0x150B +#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB +#define IXGBE_DEV_ID_82598EB_CX4 0x10DD +#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC +#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 +#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 +#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 +#define IXGBE_DEV_ID_82599_KX4 0x10F7 +#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 +#define IXGBE_DEV_ID_82599_KR 0x1517 +#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 +#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C +#define IXGBE_DEV_ID_82599_CX4 0x10F9 +#define IXGBE_DEV_ID_82599_SFP 0x10FB +#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 +#define IXGBE_SUBDEV_ID_82599_RNDC 0x1F72 +#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 +#define IXGBE_SUBDEV_ID_82599_ECNA_DP 0x0470 +#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A +#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 +#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 +#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D +#define IXGBE_DEV_ID_82599_SFP_SF_QP 0x154A +#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 +#define IXGBE_DEV_ID_82599EN_SFP 0x1557 +#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC +#define IXGBE_DEV_ID_82599_T3_LOM 0x151C +#define IXGBE_DEV_ID_82599_LS 0x154F +#define IXGBE_DEV_ID_X540T 0x1528 +#define IXGBE_DEV_ID_X540T1 0x1560 +#define IXGBE_DEV_ID_X550EM_X_SFP 0x15AC +#define IXGBE_DEV_ID_X550EM_X_10G_T 0x15AD +#define IXGBE_DEV_ID_X550EM_X_1G_T 0x15AE +#define IXGBE_DEV_ID_X550T 0x1563 +#define IXGBE_DEV_ID_X550T1 0x15D1 +#define IXGBE_DEV_ID_X550EM_A_KR 0x15C2 +#define IXGBE_DEV_ID_X550EM_A_KR_L 0x15C3 +#define IXGBE_DEV_ID_X550EM_A_SFP_N 0x15C4 +#define IXGBE_DEV_ID_X550EM_A_1G_T 0x15C6 +#define IXGBE_DEV_ID_X550EM_A_1G_T_L 0x15C7 +#define IXGBE_DEV_ID_X550EM_A_10G_T 0x15C8 +#define IXGBE_DEV_ID_X550EM_A_QSFP 0x15CA +#define IXGBE_DEV_ID_X550EM_A_QSFP_N 0x15CC +#define IXGBE_DEV_ID_X550EM_A_SFP 0x15CE +#define IXGBE_DEV_ID_X550EM_X_KX4 0x15AA +#define IXGBE_DEV_ID_X550EM_X_KR 0x15AB + +#ifdef RTE_NIC_BYPASS +#define IXGBE_DEV_ID_82599_BYPASS 0x155D +#endif + +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_BX) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82598AF_SINGLE_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598AT2) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_CX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82598EB_XF_LR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_KR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_DEV_ID_82599_COMBO_BACKPLANE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, \ + IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_CX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_RNDC) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_560FLR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_SUBDEV_ID_82599_ECNA_DP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_FCOE) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_EM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF2) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_SFP_SF_QP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599EN_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_XAUI_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_T3_LOM) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_LS) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540T1) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_10G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_1G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550T1) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_KR_L) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP_N) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_1G_T_L) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_10G_T) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_QSFP_N) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_SFP) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KX4) +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_KR) + +#ifdef RTE_NIC_BYPASS +RTE_PCI_DEV_ID_DECL_IXGBE(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_BYPASS) +#endif + +/*************** Physical I40E devices from i40e_type.h *****************/ + +#define I40E_DEV_ID_SFP_XL710 0x1572 +#define I40E_DEV_ID_QEMU 0x1574 +#define I40E_DEV_ID_KX_B 0x1580 +#define I40E_DEV_ID_KX_C 0x1581 +#define I40E_DEV_ID_QSFP_A 0x1583 +#define I40E_DEV_ID_QSFP_B 0x1584 +#define I40E_DEV_ID_QSFP_C 0x1585 +#define I40E_DEV_ID_10G_BASE_T 0x1586 +#define I40E_DEV_ID_20G_KR2 0x1587 +#define I40E_DEV_ID_20G_KR2_A 0x1588 +#define I40E_DEV_ID_10G_BASE_T4 0x1589 +#define I40E_DEV_ID_X722_A0 0x374C +#define I40E_DEV_ID_KX_X722 0x37CE +#define I40E_DEV_ID_QSFP_X722 0x37CF +#define I40E_DEV_ID_SFP_X722 0x37D0 +#define I40E_DEV_ID_1G_BASE_T_X722 0x37D1 +#define I40E_DEV_ID_10G_BASE_T_X722 0x37D2 + +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_XL710) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QEMU) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_C) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_B) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_C) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_20G_KR2_A) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T4) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_KX_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_QSFP_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_SFP_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_1G_BASE_T_X722) +RTE_PCI_DEV_ID_DECL_I40E(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_10G_BASE_T_X722) + +/*************** Physical FM10K devices from fm10k_type.h ***************/ + +#define FM10K_DEV_ID_PF 0x15A4 +#define FM10K_DEV_ID_SDI_FM10420_QDA2 0x15D0 + +RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_PF) +RTE_PCI_DEV_ID_DECL_FM10K(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_SDI_FM10420_QDA2) + +/****************** Virtual IGB devices from e1000_hw.h ******************/ + +#define E1000_DEV_ID_82576_VF 0x10CA +#define E1000_DEV_ID_82576_VF_HV 0x152D +#define E1000_DEV_ID_I350_VF 0x1520 +#define E1000_DEV_ID_I350_VF_HV 0x152F + +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_82576_VF_HV) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF) +RTE_PCI_DEV_ID_DECL_IGBVF(PCI_VENDOR_ID_INTEL, E1000_DEV_ID_I350_VF_HV) + +/****************** Virtual IXGBE devices from ixgbe_type.h ******************/ + +#define IXGBE_DEV_ID_82599_VF 0x10ED +#define IXGBE_DEV_ID_82599_VF_HV 0x152E +#define IXGBE_DEV_ID_X540_VF 0x1515 +#define IXGBE_DEV_ID_X540_VF_HV 0x1530 +#define IXGBE_DEV_ID_X550_VF_HV 0x1564 +#define IXGBE_DEV_ID_X550_VF 0x1565 +#define IXGBE_DEV_ID_X550EM_A_VF 0x15C5 +#define IXGBE_DEV_ID_X550EM_A_VF_HV 0x15B4 +#define IXGBE_DEV_ID_X550EM_X_VF 0x15A8 +#define IXGBE_DEV_ID_X550EM_X_VF_HV 0x15A9 + +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_82599_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X540_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_A_VF_HV) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF) +RTE_PCI_DEV_ID_DECL_IXGBEVF(PCI_VENDOR_ID_INTEL, IXGBE_DEV_ID_X550EM_X_VF_HV) + +/****************** Virtual I40E devices from i40e_type.h ********************/ + +#define I40E_DEV_ID_VF 0x154C +#define I40E_DEV_ID_VF_HV 0x1571 +#define I40E_DEV_ID_X722_A0_VF 0x374D +#define I40E_DEV_ID_X722_VF 0x37CD +#define I40E_DEV_ID_X722_VF_HV 0x37D9 + +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_VF_HV) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_A0_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF) +RTE_PCI_DEV_ID_DECL_I40EVF(PCI_VENDOR_ID_INTEL, I40E_DEV_ID_X722_VF_HV) + +/****************** Virtio devices from virtio.h ******************/ + +#define QUMRANET_DEV_ID_VIRTIO 0x1000 + +RTE_PCI_DEV_ID_DECL_VIRTIO(PCI_VENDOR_ID_QUMRANET, QUMRANET_DEV_ID_VIRTIO) + +/****************** VMware VMXNET3 devices ******************/ + +#define VMWARE_DEV_ID_VMXNET3 0x07B0 + +RTE_PCI_DEV_ID_DECL_VMXNET3(PCI_VENDOR_ID_VMWARE, VMWARE_DEV_ID_VMXNET3) + +/*************** Virtual FM10K devices from fm10k_type.h ***************/ + +#define FM10K_DEV_ID_VF 0x15A5 + +RTE_PCI_DEV_ID_DECL_FM10KVF(PCI_VENDOR_ID_INTEL, FM10K_DEV_ID_VF) + +/****************** Cisco VIC devices ******************/ + +#define PCI_DEVICE_ID_CISCO_VIC_ENET 0x0043 /* ethernet vnic */ +#define PCI_DEVICE_ID_CISCO_VIC_ENET_VF 0x0071 /* enet SRIOV VF */ + +RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET) +RTE_PCI_DEV_ID_DECL_ENIC(PCI_VENDOR_ID_CISCO, PCI_DEVICE_ID_CISCO_VIC_ENET_VF) + +/****************** QLogic devices ******************/ + +/* Broadcom/QLogic BNX2X */ +#define BNX2X_DEV_ID_57710 0x164e +#define BNX2X_DEV_ID_57711 0x164f +#define BNX2X_DEV_ID_57711E 0x1650 +#define BNX2X_DEV_ID_57712 0x1662 +#define BNX2X_DEV_ID_57712_MF 0x1663 +#define BNX2X_DEV_ID_57712_VF 0x166f +#define BNX2X_DEV_ID_57713 0x1651 +#define BNX2X_DEV_ID_57713E 0x1652 +#define BNX2X_DEV_ID_57800 0x168a +#define BNX2X_DEV_ID_57800_MF 0x16a5 +#define BNX2X_DEV_ID_57800_VF 0x16a9 +#define BNX2X_DEV_ID_57810 0x168e +#define BNX2X_DEV_ID_57810_MF 0x16ae +#define BNX2X_DEV_ID_57810_VF 0x16af +#define BNX2X_DEV_ID_57811 0x163d +#define BNX2X_DEV_ID_57811_MF 0x163e +#define BNX2X_DEV_ID_57811_VF 0x163f + +#define BNX2X_DEV_ID_57840_OBS 0x168d +#define BNX2X_DEV_ID_57840_OBS_MF 0x16ab +#define BNX2X_DEV_ID_57840_4_10 0x16a1 +#define BNX2X_DEV_ID_57840_2_20 0x16a2 +#define BNX2X_DEV_ID_57840_MF 0x16a4 +#define BNX2X_DEV_ID_57840_VF 0x16ad + +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57800_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57711) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_VF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_OBS) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_4_10) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_2_20) +RTE_PCI_DEV_ID_DECL_BNX2XVF(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_VF) +#ifdef RTE_LIBRTE_BNX2X_MF_SUPPORT +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57810_MF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57811_MF) +RTE_PCI_DEV_ID_DECL_BNX2X(PCI_VENDOR_ID_BROADCOM, BNX2X_DEV_ID_57840_MF) +#endif + +/* + * Undef all RTE_PCI_DEV_ID_DECL_* here. + */ +#undef RTE_PCI_DEV_ID_DECL_BNX2X +#undef RTE_PCI_DEV_ID_DECL_BNX2XVF +#undef RTE_PCI_DEV_ID_DECL_EM +#undef RTE_PCI_DEV_ID_DECL_IGB +#undef RTE_PCI_DEV_ID_DECL_IGBVF +#undef RTE_PCI_DEV_ID_DECL_IXGBE +#undef RTE_PCI_DEV_ID_DECL_IXGBEVF +#undef RTE_PCI_DEV_ID_DECL_I40E +#undef RTE_PCI_DEV_ID_DECL_I40EVF +#undef RTE_PCI_DEV_ID_DECL_VIRTIO +#undef RTE_PCI_DEV_ID_DECL_VMXNET3 +#undef RTE_PCI_DEV_ID_DECL_FM10K +#undef RTE_PCI_DEV_ID_DECL_FM10KVF diff --git a/lib/librte_eal/common/include/rte_per_lcore.h b/lib/librte_eal/common/include/rte_per_lcore.h new file mode 100644 index 00000000..5434729a --- /dev/null +++ b/lib/librte_eal/common/include/rte_per_lcore.h @@ -0,0 +1,79 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_PER_LCORE_H_ +#define _RTE_PER_LCORE_H_ + +/** + * @file + * + * Per-lcore variables in RTE + * + * This file defines an API for instantiating per-lcore "global + * variables" that are environment-specific. Note that in all + * environments, a "shared variable" is the default when you use a + * global variable. + * + * Parts of this are execution environment specific. + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include + +/** + * Macro to define a per lcore variable "var" of type "type", don't + * use keywords like "static" or "volatile" in type, just prefix the + * whole macro. + */ +#define RTE_DEFINE_PER_LCORE(type, name) \ + __thread __typeof__(type) per_lcore_##name + +/** + * Macro to declare an extern per lcore variable "var" of type "type" + */ +#define RTE_DECLARE_PER_LCORE(type, name) \ + extern __thread __typeof__(type) per_lcore_##name + +/** + * Read/write the per-lcore variable value + */ +#define RTE_PER_LCORE(name) (per_lcore_##name) + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_PER_LCORE_H_ */ diff --git a/lib/librte_eal/common/include/rte_random.h b/lib/librte_eal/common/include/rte_random.h new file mode 100644 index 00000000..24ae8363 --- /dev/null +++ b/lib/librte_eal/common/include/rte_random.h @@ -0,0 +1,91 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_RANDOM_H_ +#define _RTE_RANDOM_H_ + +/** + * @file + * + * Pseudo-random Generators in RTE + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include + +/** + * Seed the pseudo-random generator. + * + * The generator is automatically seeded by the EAL init with a timer + * value. It may need to be re-seeded by the user with a real random + * value. + * + * @param seedval + * The value of the seed. + */ +static inline void +rte_srand(uint64_t seedval) +{ + srand48((long unsigned int)seedval); +} + +/** + * Get a pseudo-random value. + * + * This function generates pseudo-random numbers using the linear + * congruential algorithm and 48-bit integer arithmetic, called twice + * to generate a 64-bit value. + * + * @return + * A pseudo-random value between 0 and (1<<64)-1. + */ +static inline uint64_t +rte_rand(void) +{ + uint64_t val; + val = lrand48(); + val <<= 32; + val += lrand48(); + return val; +} + +#ifdef __cplusplus +} +#endif + + +#endif /* _RTE_PER_LCORE_H_ */ diff --git a/lib/librte_eal/common/include/rte_string_fns.h b/lib/librte_eal/common/include/rte_string_fns.h new file mode 100644 index 00000000..cfca2f8d --- /dev/null +++ b/lib/librte_eal/common/include/rte_string_fns.h @@ -0,0 +1,81 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * + * String-related functions as replacement for libc equivalents + */ + +#ifndef _RTE_STRING_FNS_H_ +#define _RTE_STRING_FNS_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * Takes string "string" parameter and splits it at character "delim" + * up to maxtokens-1 times - to give "maxtokens" resulting tokens. Like + * strtok or strsep functions, this modifies its input string, by replacing + * instances of "delim" with '\\0'. All resultant tokens are returned in the + * "tokens" array which must have enough entries to hold "maxtokens". + * + * @param string + * The input string to be split into tokens + * + * @param stringlen + * The max length of the input buffer + * + * @param tokens + * The array to hold the pointers to the tokens in the string + * + * @param maxtokens + * The number of elements in the tokens array. At most, maxtokens-1 splits + * of the string will be done. + * + * @param delim + * The character on which the split of the data will be done + * + * @return + * The number of tokens in the tokens array. + */ +int +rte_strsplit(char *string, int stringlen, + char **tokens, int maxtokens, char delim); + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_STRING_FNS_H */ diff --git a/lib/librte_eal/common/include/rte_tailq.h b/lib/librte_eal/common/include/rte_tailq.h new file mode 100644 index 00000000..4a686e68 --- /dev/null +++ b/lib/librte_eal/common/include/rte_tailq.h @@ -0,0 +1,162 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_TAILQ_H_ +#define _RTE_TAILQ_H_ + +/** + * @file + * Here defines rte_tailq APIs for only internal use + * + */ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** dummy structure type used by the rte_tailq APIs */ +struct rte_tailq_entry { + TAILQ_ENTRY(rte_tailq_entry) next; /**< Pointer entries for a tailq list */ + void *data; /**< Pointer to the data referenced by this tailq entry */ +}; +/** dummy */ +TAILQ_HEAD(rte_tailq_entry_head, rte_tailq_entry); + +#define RTE_TAILQ_NAMESIZE 32 + +/** + * The structure defining a tailq header entry for storing + * in the rte_config structure in shared memory. Each tailq + * is identified by name. + * Any library storing a set of objects e.g. rings, mempools, hash-tables, + * is recommended to use an entry here, so as to make it easy for + * a multi-process app to find already-created elements in shared memory. + */ +struct rte_tailq_head { + struct rte_tailq_entry_head tailq_head; /**< NOTE: must be first element */ + char name[RTE_TAILQ_NAMESIZE]; +}; + +struct rte_tailq_elem { + /** + * Reference to head in shared mem, updated at init time by + * rte_eal_tailqs_init() + */ + struct rte_tailq_head *head; + TAILQ_ENTRY(rte_tailq_elem) next; + const char name[RTE_TAILQ_NAMESIZE]; +}; + +/** + * Return the first tailq entry casted to the right struct. + */ +#define RTE_TAILQ_CAST(tailq_entry, struct_name) \ + (struct struct_name *)&(tailq_entry)->tailq_head + +/** + * Utility macro to make looking up a tailqueue for a particular struct easier. + * + * @param name + * The name of tailq + * + * @param struct_name + * The name of the list type we are using. (Generally this is the same as the + * first parameter passed to TAILQ_HEAD macro) + * + * @return + * The return value from rte_eal_tailq_lookup, typecast to the appropriate + * structure pointer type. + * NULL on error, since the tailq_head is the first + * element in the rte_tailq_head structure. + */ +#define RTE_TAILQ_LOOKUP(name, struct_name) \ + RTE_TAILQ_CAST(rte_eal_tailq_lookup(name), struct_name) + +/** + * Dump tail queues to the console. + * + * @param f + * A pointer to a file for output + */ +void rte_dump_tailq(FILE *f); + +/** + * Lookup for a tail queue. + * + * Get a pointer to a tail queue header of a tail + * queue identified by the name given as an argument. + * Note: this function is not multi-thread safe, and should only be called from + * a single thread at a time + * + * @param name + * The name of the queue. + * @return + * A pointer to the tail queue head structure. + */ +struct rte_tailq_head *rte_eal_tailq_lookup(const char *name); + +/** + * Register a tail queue. + * + * Register a tail queue from shared memory. + * This function is mainly used by EAL_REGISTER_TAILQ macro which is used to + * register tailq from the different dpdk libraries. Since this macro is a + * constructor, the function has no access to dpdk shared memory, so the + * registered tailq can not be used before call to rte_eal_init() which calls + * rte_eal_tailqs_init(). + * + * @param t + * The tailq element which contains the name of the tailq you want to + * create (/retrieve when in secondary process). + * @return + * 0 on success or -1 in case of an error. + */ +int rte_eal_tailq_register(struct rte_tailq_elem *t); + +#define EAL_REGISTER_TAILQ(t) \ +void tailqinitfn_ ##t(void); \ +void __attribute__((constructor, used)) tailqinitfn_ ##t(void) \ +{ \ + if (rte_eal_tailq_register(&t) < 0) \ + rte_panic("Cannot initialize tailq: %s\n", t.name); \ +} + +#ifdef __cplusplus +} +#endif + +#endif /* _RTE_TAILQ_H_ */ diff --git a/lib/librte_eal/common/include/rte_time.h b/lib/librte_eal/common/include/rte_time.h new file mode 100644 index 00000000..4b13b9c1 --- /dev/null +++ b/lib/librte_eal/common/include/rte_time.h @@ -0,0 +1,122 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define NSEC_PER_SEC 1000000000L + +/** + * Structure to hold the parameters of a running cycle counter to assist + * in converting cycles to nanoseconds. + */ +struct rte_timecounter { + /** Last cycle counter value read. */ + uint64_t cycle_last; + /** Nanoseconds count. */ + uint64_t nsec; + /** Bitmask separating nanosecond and sub-nanoseconds. */ + uint64_t nsec_mask; + /** Sub-nanoseconds count. */ + uint64_t nsec_frac; + /** Bitmask for two's complement substraction of non-64 bit counters. */ + uint64_t cc_mask; + /** Cycle to nanosecond divisor (power of two). */ + uint32_t cc_shift; +}; + +/** + * Converts cyclecounter cycles to nanoseconds. + */ +static inline uint64_t +rte_cyclecounter_cycles_to_ns(struct rte_timecounter *tc, uint64_t cycles) +{ + uint64_t ns; + + /* Add fractional nanoseconds. */ + ns = cycles + tc->nsec_frac; + tc->nsec_frac = ns & tc->nsec_mask; + + /* Shift to get only nanoseconds. */ + return ns >> tc->cc_shift; +} + +/** + * Update the internal nanosecond count in the structure. + */ +static inline uint64_t +rte_timecounter_update(struct rte_timecounter *tc, uint64_t cycle_now) +{ + uint64_t cycle_delta, ns_offset; + + /* Calculate the delta since the last call. */ + if (tc->cycle_last <= cycle_now) + cycle_delta = (cycle_now - tc->cycle_last) & tc->cc_mask; + else + /* Handle cycle counts that have wrapped around . */ + cycle_delta = (~(tc->cycle_last - cycle_now) & tc->cc_mask) + 1; + + /* Convert to nanoseconds. */ + ns_offset = rte_cyclecounter_cycles_to_ns(tc, cycle_delta); + + /* Store current cycle counter for next call. */ + tc->cycle_last = cycle_now; + + /* Update the nanosecond count. */ + tc->nsec += ns_offset; + + return tc->nsec; +} + +/** + * Convert from timespec structure into nanosecond units. + */ +static inline uint64_t +rte_timespec_to_ns(const struct timespec *ts) +{ + return ((uint64_t) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +/** + * Convert from nanosecond units into timespec structure. + */ +static inline struct timespec +rte_ns_to_timespec(uint64_t nsec) +{ + struct timespec ts = {0, 0}; + + if (nsec == 0) + return ts; + + ts.tv_sec = nsec / NSEC_PER_SEC; + ts.tv_nsec = nsec % NSEC_PER_SEC; + + return ts; +} diff --git a/lib/librte_eal/common/include/rte_version.h b/lib/librte_eal/common/include/rte_version.h new file mode 100644 index 00000000..f8ea6d73 --- /dev/null +++ b/lib/librte_eal/common/include/rte_version.h @@ -0,0 +1,130 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definitions of DPDK version numbers + */ + +#ifndef _RTE_VERSION_H_ +#define _RTE_VERSION_H_ + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include + +/** + * String that appears before the version number + */ +#define RTE_VER_PREFIX "DPDK" + +/** + * Major version/year number i.e. the yy in yy.mm.z + */ +#define RTE_VER_YEAR 16 + +/** + * Minor version/month number i.e. the mm in yy.mm.z + */ +#define RTE_VER_MONTH 4 + +/** + * Patch level number i.e. the z in yy.mm.z + */ +#define RTE_VER_MINOR 0 + +/** + * Extra string to be appended to version number + */ +#define RTE_VER_SUFFIX "" + +/** + * Patch release number + * 0-15 = release candidates + * 16 = release + */ +#define RTE_VER_RELEASE 16 + +/** + * Macro to compute a version number usable for comparisons + */ +#define RTE_VERSION_NUM(a,b,c,d) ((a) << 24 | (b) << 16 | (c) << 8 | (d)) + +/** + * All version numbers in one to compare with RTE_VERSION_NUM() + */ +#define RTE_VERSION RTE_VERSION_NUM( \ + RTE_VER_YEAR, \ + RTE_VER_MONTH, \ + RTE_VER_MINOR, \ + RTE_VER_RELEASE) + +/** + * Function returning version string + * @return + * string + */ +static inline const char * +rte_version(void) +{ + static char version[32]; + if (version[0] != 0) + return version; + if (strlen(RTE_VER_SUFFIX) == 0) + snprintf(version, sizeof(version), "%s %d.%02d.%d", + RTE_VER_PREFIX, + RTE_VER_YEAR, + RTE_VER_MONTH, + RTE_VER_MINOR); + else + snprintf(version, sizeof(version), "%s %d.%02d.%d%s%d", + RTE_VER_PREFIX, + RTE_VER_YEAR, + RTE_VER_MONTH, + RTE_VER_MINOR, + RTE_VER_SUFFIX, + RTE_VER_RELEASE < 16 ? + RTE_VER_RELEASE : + RTE_VER_RELEASE - 16); + return version; +} + +#ifdef __cplusplus +} +#endif + +#endif /* RTE_VERSION_H */ diff --git a/lib/librte_eal/common/include/rte_warnings.h b/lib/librte_eal/common/include/rte_warnings.h new file mode 100644 index 00000000..54b545c9 --- /dev/null +++ b/lib/librte_eal/common/include/rte_warnings.h @@ -0,0 +1,84 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +/** + * @file + * Definitions of warnings for use of various insecure functions + */ + +#ifndef _RTE_WARNINGS_H_ +#define _RTE_WARNINGS_H_ + +#ifdef RTE_INSECURE_FUNCTION_WARNING + +/* we need to include all used standard header files so that they appear + * _before_ we poison the function names. + */ + +#include +#include +#include +#include +#include +#ifdef RTE_EXEC_ENV_LINUXAPP +#include +#endif + +/* the following function are deemed not fully secure for use e.g. they + * do not always null-terminate arguments */ +#pragma GCC poison sprintf strtok snprintf vsnprintf +#pragma GCC poison strlen strcpy strcat +#pragma GCC poison sscanf + +/* other unsafe functions may be implemented as macros so just undef them */ +#ifdef strsep +#undef strsep +#else +#pragma GCC poison strsep +#endif + +#ifdef strncpy +#undef strncpy +#else +#pragma GCC poison strncpy +#endif + +#ifdef strncat +#undef strncat +#else +#pragma GCC poison strncat +#endif + +#endif + +#endif /* RTE_WARNINGS_H */ diff --git a/lib/librte_eal/common/malloc_elem.c b/lib/librte_eal/common/malloc_elem.c new file mode 100644 index 00000000..27e9925d --- /dev/null +++ b/lib/librte_eal/common/malloc_elem.c @@ -0,0 +1,344 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_elem.h" +#include "malloc_heap.h" + +#define MIN_DATA_SIZE (RTE_CACHE_LINE_SIZE) + +/* + * initialise a general malloc_elem header structure + */ +void +malloc_elem_init(struct malloc_elem *elem, + struct malloc_heap *heap, const struct rte_memseg *ms, size_t size) +{ + elem->heap = heap; + elem->ms = ms; + elem->prev = NULL; + memset(&elem->free_list, 0, sizeof(elem->free_list)); + elem->state = ELEM_FREE; + elem->size = size; + elem->pad = 0; + set_header(elem); + set_trailer(elem); +} + +/* + * initialise a dummy malloc_elem header for the end-of-memseg marker + */ +void +malloc_elem_mkend(struct malloc_elem *elem, struct malloc_elem *prev) +{ + malloc_elem_init(elem, prev->heap, prev->ms, 0); + elem->prev = prev; + elem->state = ELEM_BUSY; /* mark busy so its never merged */ +} + +/* + * calculate the starting point of where data of the requested size + * and alignment would fit in the current element. If the data doesn't + * fit, return NULL. + */ +static void * +elem_start_pt(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + const size_t bmask = ~(bound - 1); + uintptr_t end_pt = (uintptr_t)elem + + elem->size - MALLOC_ELEM_TRAILER_LEN; + uintptr_t new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + uintptr_t new_elem_start; + + /* check boundary */ + if ((new_data_start & bmask) != ((end_pt - 1) & bmask)) { + end_pt = RTE_ALIGN_FLOOR(end_pt, bound); + new_data_start = RTE_ALIGN_FLOOR((end_pt - size), align); + if (((end_pt - 1) & bmask) != (new_data_start & bmask)) + return NULL; + } + + new_elem_start = new_data_start - MALLOC_ELEM_HEADER_LEN; + + /* if the new start point is before the exist start, it won't fit */ + return (new_elem_start < (uintptr_t)elem) ? NULL : (void *)new_elem_start; +} + +/* + * use elem_start_pt to determine if we get meet the size and + * alignment request from the current element + */ +int +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + return elem_start_pt(elem, size, align, bound) != NULL; +} + +/* + * split an existing element into two smaller elements at the given + * split_pt parameter. + */ +static void +split_elem(struct malloc_elem *elem, struct malloc_elem *split_pt) +{ + struct malloc_elem *next_elem = RTE_PTR_ADD(elem, elem->size); + const size_t old_elem_size = (uintptr_t)split_pt - (uintptr_t)elem; + const size_t new_elem_size = elem->size - old_elem_size; + + malloc_elem_init(split_pt, elem->heap, elem->ms, new_elem_size); + split_pt->prev = elem; + next_elem->prev = split_pt; + elem->size = old_elem_size; + set_trailer(elem); +} + +/* + * Given an element size, compute its freelist index. + * We free an element into the freelist containing similarly-sized elements. + * We try to allocate elements starting with the freelist containing + * similarly-sized elements, and if necessary, we search freelists + * containing larger elements. + * + * Example element size ranges for a heap with five free lists: + * heap->free_head[0] - (0 , 2^8] + * heap->free_head[1] - (2^8 , 2^10] + * heap->free_head[2] - (2^10 ,2^12] + * heap->free_head[3] - (2^12, 2^14] + * heap->free_head[4] - (2^14, MAX_SIZE] + */ +size_t +malloc_elem_free_list_index(size_t size) +{ +#define MALLOC_MINSIZE_LOG2 8 +#define MALLOC_LOG2_INCREMENT 2 + + size_t log2; + size_t index; + + if (size <= (1UL << MALLOC_MINSIZE_LOG2)) + return 0; + + /* Find next power of 2 >= size. */ + log2 = sizeof(size) * 8 - __builtin_clzl(size-1); + + /* Compute freelist index, based on log2(size). */ + index = (log2 - MALLOC_MINSIZE_LOG2 + MALLOC_LOG2_INCREMENT - 1) / + MALLOC_LOG2_INCREMENT; + + return index <= RTE_HEAP_NUM_FREELISTS-1? + index: RTE_HEAP_NUM_FREELISTS-1; +} + +/* + * Add the specified element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem) +{ + size_t idx; + + idx = malloc_elem_free_list_index(elem->size - MALLOC_ELEM_HEADER_LEN); + elem->state = ELEM_FREE; + LIST_INSERT_HEAD(&elem->heap->free_head[idx], elem, free_list); +} + +/* + * Remove the specified element from its heap's free list. + */ +static void +elem_free_list_remove(struct malloc_elem *elem) +{ + LIST_REMOVE(elem, free_list); +} + +/* + * reserve a block of data in an existing malloc_elem. If the malloc_elem + * is much larger than the data block requested, we split the element in two. + * This function is only called from malloc_heap_alloc so parameter checking + * is not done here, as it's done there previously. + */ +struct malloc_elem * +malloc_elem_alloc(struct malloc_elem *elem, size_t size, unsigned align, + size_t bound) +{ + struct malloc_elem *new_elem = elem_start_pt(elem, size, align, bound); + const size_t old_elem_size = (uintptr_t)new_elem - (uintptr_t)elem; + const size_t trailer_size = elem->size - old_elem_size - size - + MALLOC_ELEM_OVERHEAD; + + elem_free_list_remove(elem); + + if (trailer_size > MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* split it, too much free space after elem */ + struct malloc_elem *new_free_elem = + RTE_PTR_ADD(new_elem, size + MALLOC_ELEM_OVERHEAD); + + split_elem(elem, new_free_elem); + malloc_elem_free_list_insert(new_free_elem); + } + + if (old_elem_size < MALLOC_ELEM_OVERHEAD + MIN_DATA_SIZE) { + /* don't split it, pad the element instead */ + elem->state = ELEM_BUSY; + elem->pad = old_elem_size; + + /* put a dummy header in padding, to point to real element header */ + if (elem->pad > 0){ /* pad will be at least 64-bytes, as everything + * is cache-line aligned */ + new_elem->pad = elem->pad; + new_elem->state = ELEM_PAD; + new_elem->size = elem->size - elem->pad; + set_header(new_elem); + } + + return new_elem; + } + + /* we are going to split the element in two. The original element + * remains free, and the new element is the one allocated. + * Re-insert original element, in case its new size makes it + * belong on a different list. + */ + split_elem(elem, new_elem); + new_elem->state = ELEM_BUSY; + malloc_elem_free_list_insert(elem); + + return new_elem; +} + +/* + * joing two struct malloc_elem together. elem1 and elem2 must + * be contiguous in memory. + */ +static inline void +join_elem(struct malloc_elem *elem1, struct malloc_elem *elem2) +{ + struct malloc_elem *next = RTE_PTR_ADD(elem2, elem2->size); + elem1->size += elem2->size; + next->prev = elem1; +} + +/* + * free a malloc_elem block by adding it to the free list. If the + * blocks either immediately before or immediately after newly freed block + * are also free, the blocks are merged together. + */ +int +malloc_elem_free(struct malloc_elem *elem) +{ + if (!malloc_elem_cookies_ok(elem) || elem->state != ELEM_BUSY) + return -1; + + rte_spinlock_lock(&(elem->heap->lock)); + struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); + if (next->state == ELEM_FREE){ + /* remove from free list, join to this one */ + elem_free_list_remove(next); + join_elem(elem, next); + } + + /* check if previous element is free, if so join with it and return, + * need to re-insert in free list, as that element's size is changing + */ + if (elem->prev != NULL && elem->prev->state == ELEM_FREE) { + elem_free_list_remove(elem->prev); + join_elem(elem->prev, elem); + malloc_elem_free_list_insert(elem->prev); + } + /* otherwise add ourselves to the free list */ + else { + malloc_elem_free_list_insert(elem); + elem->pad = 0; + } + /* decrease heap's count of allocated elements */ + elem->heap->alloc_count--; + rte_spinlock_unlock(&(elem->heap->lock)); + + return 0; +} + +/* + * attempt to resize a malloc_elem by expanding into any free space + * immediately after it in memory. + */ +int +malloc_elem_resize(struct malloc_elem *elem, size_t size) +{ + const size_t new_size = size + MALLOC_ELEM_OVERHEAD; + /* if we request a smaller size, then always return ok */ + const size_t current_size = elem->size - elem->pad; + if (current_size >= new_size) + return 0; + + struct malloc_elem *next = RTE_PTR_ADD(elem, elem->size); + rte_spinlock_lock(&elem->heap->lock); + if (next ->state != ELEM_FREE) + goto err_return; + if (current_size + next->size < new_size) + goto err_return; + + /* we now know the element fits, so remove from free list, + * join the two + */ + elem_free_list_remove(next); + join_elem(elem, next); + + if (elem->size - new_size >= MIN_DATA_SIZE + MALLOC_ELEM_OVERHEAD){ + /* now we have a big block together. Lets cut it down a bit, by splitting */ + struct malloc_elem *split_pt = RTE_PTR_ADD(elem, new_size); + split_pt = RTE_PTR_ALIGN_CEIL(split_pt, RTE_CACHE_LINE_SIZE); + split_elem(elem, split_pt); + malloc_elem_free_list_insert(split_pt); + } + rte_spinlock_unlock(&elem->heap->lock); + return 0; + +err_return: + rte_spinlock_unlock(&elem->heap->lock); + return -1; +} diff --git a/lib/librte_eal/common/malloc_elem.h b/lib/librte_eal/common/malloc_elem.h new file mode 100644 index 00000000..f04b2d1e --- /dev/null +++ b/lib/librte_eal/common/malloc_elem.h @@ -0,0 +1,192 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MALLOC_ELEM_H_ +#define MALLOC_ELEM_H_ + +#include + +/* dummy definition of struct so we can use pointers to it in malloc_elem struct */ +struct malloc_heap; + +enum elem_state { + ELEM_FREE = 0, + ELEM_BUSY, + ELEM_PAD /* element is a padding-only header */ +}; + +struct malloc_elem { + struct malloc_heap *heap; + struct malloc_elem *volatile prev; /* points to prev elem in memseg */ + LIST_ENTRY(malloc_elem) free_list; /* list of free elements in heap */ + const struct rte_memseg *ms; + volatile enum elem_state state; + uint32_t pad; + size_t size; +#ifdef RTE_LIBRTE_MALLOC_DEBUG + uint64_t header_cookie; /* Cookie marking start of data */ + /* trailer cookie at start + size */ +#endif +} __rte_cache_aligned; + +#ifndef RTE_LIBRTE_MALLOC_DEBUG +static const unsigned MALLOC_ELEM_TRAILER_LEN = 0; + +/* dummy function - just check if pointer is non-null */ +static inline int +malloc_elem_cookies_ok(const struct malloc_elem *elem){ return elem != NULL; } + +/* dummy function - no header if malloc_debug is not enabled */ +static inline void +set_header(struct malloc_elem *elem __rte_unused){ } + +/* dummy function - no trailer if malloc_debug is not enabled */ +static inline void +set_trailer(struct malloc_elem *elem __rte_unused){ } + + +#else +static const unsigned MALLOC_ELEM_TRAILER_LEN = RTE_CACHE_LINE_SIZE; + +#define MALLOC_HEADER_COOKIE 0xbadbadbadadd2e55ULL /**< Header cookie. */ +#define MALLOC_TRAILER_COOKIE 0xadd2e55badbadbadULL /**< Trailer cookie.*/ + +/* define macros to make referencing the header and trailer cookies easier */ +#define MALLOC_ELEM_TRAILER(elem) (*((uint64_t*)RTE_PTR_ADD(elem, \ + elem->size - MALLOC_ELEM_TRAILER_LEN))) +#define MALLOC_ELEM_HEADER(elem) (elem->header_cookie) + +static inline void +set_header(struct malloc_elem *elem) +{ + if (elem != NULL) + MALLOC_ELEM_HEADER(elem) = MALLOC_HEADER_COOKIE; +} + +static inline void +set_trailer(struct malloc_elem *elem) +{ + if (elem != NULL) + MALLOC_ELEM_TRAILER(elem) = MALLOC_TRAILER_COOKIE; +} + +/* check that the header and trailer cookies are set correctly */ +static inline int +malloc_elem_cookies_ok(const struct malloc_elem *elem) +{ + return elem != NULL && + MALLOC_ELEM_HEADER(elem) == MALLOC_HEADER_COOKIE && + MALLOC_ELEM_TRAILER(elem) == MALLOC_TRAILER_COOKIE; +} + +#endif + +static const unsigned MALLOC_ELEM_HEADER_LEN = sizeof(struct malloc_elem); +#define MALLOC_ELEM_OVERHEAD (MALLOC_ELEM_HEADER_LEN + MALLOC_ELEM_TRAILER_LEN) + +/* + * Given a pointer to the start of a memory block returned by malloc, get + * the actual malloc_elem header for that block. + */ +static inline struct malloc_elem * +malloc_elem_from_data(const void *data) +{ + if (data == NULL) + return NULL; + + struct malloc_elem *elem = RTE_PTR_SUB(data, MALLOC_ELEM_HEADER_LEN); + if (!malloc_elem_cookies_ok(elem)) + return NULL; + return elem->state != ELEM_PAD ? elem: RTE_PTR_SUB(elem, elem->pad); +} + +/* + * initialise a malloc_elem header + */ +void +malloc_elem_init(struct malloc_elem *elem, + struct malloc_heap *heap, + const struct rte_memseg *ms, + size_t size); + +/* + * initialise a dummy malloc_elem header for the end-of-memseg marker + */ +void +malloc_elem_mkend(struct malloc_elem *elem, + struct malloc_elem *prev_free); + +/* + * return true if the current malloc_elem can hold a block of data + * of the requested size and with the requested alignment + */ +int +malloc_elem_can_hold(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); + +/* + * reserve a block of data in an existing malloc_elem. If the malloc_elem + * is much larger than the data block requested, we split the element in two. + */ +struct malloc_elem * +malloc_elem_alloc(struct malloc_elem *elem, size_t size, + unsigned align, size_t bound); + +/* + * free a malloc_elem block by adding it to the free list. If the + * blocks either immediately before or immediately after newly freed block + * are also free, the blocks are merged together. + */ +int +malloc_elem_free(struct malloc_elem *elem); + +/* + * attempt to resize a malloc_elem by expanding into any free space + * immediately after it in memory. + */ +int +malloc_elem_resize(struct malloc_elem *elem, size_t size); + +/* + * Given an element size, compute its freelist index. + */ +size_t +malloc_elem_free_list_index(size_t size); + +/* + * Add element to its heap's free list. + */ +void +malloc_elem_free_list_insert(struct malloc_elem *elem); + +#endif /* MALLOC_ELEM_H_ */ diff --git a/lib/librte_eal/common/malloc_heap.c b/lib/librte_eal/common/malloc_heap.c new file mode 100644 index 00000000..763fa324 --- /dev/null +++ b/lib/librte_eal/common/malloc_heap.c @@ -0,0 +1,236 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "malloc_elem.h" +#include "malloc_heap.h" + +static unsigned +check_hugepage_sz(unsigned flags, uint64_t hugepage_sz) +{ + unsigned check_flag = 0; + + if (!(flags & ~RTE_MEMZONE_SIZE_HINT_ONLY)) + return 1; + + switch (hugepage_sz) { + case RTE_PGSIZE_256K: + check_flag = RTE_MEMZONE_256KB; + break; + case RTE_PGSIZE_2M: + check_flag = RTE_MEMZONE_2MB; + break; + case RTE_PGSIZE_16M: + check_flag = RTE_MEMZONE_16MB; + break; + case RTE_PGSIZE_256M: + check_flag = RTE_MEMZONE_256MB; + break; + case RTE_PGSIZE_512M: + check_flag = RTE_MEMZONE_512MB; + break; + case RTE_PGSIZE_1G: + check_flag = RTE_MEMZONE_1GB; + break; + case RTE_PGSIZE_4G: + check_flag = RTE_MEMZONE_4GB; + break; + case RTE_PGSIZE_16G: + check_flag = RTE_MEMZONE_16GB; + } + + return check_flag & flags; +} + +/* + * Expand the heap with a memseg. + * This reserves the zone and sets a dummy malloc_elem header at the end + * to prevent overflow. The rest of the zone is added to free list as a single + * large free block + */ +static void +malloc_heap_add_memseg(struct malloc_heap *heap, struct rte_memseg *ms) +{ + /* allocate the memory block headers, one at end, one at start */ + struct malloc_elem *start_elem = (struct malloc_elem *)ms->addr; + struct malloc_elem *end_elem = RTE_PTR_ADD(ms->addr, + ms->len - MALLOC_ELEM_OVERHEAD); + end_elem = RTE_PTR_ALIGN_FLOOR(end_elem, RTE_CACHE_LINE_SIZE); + const size_t elem_size = (uintptr_t)end_elem - (uintptr_t)start_elem; + + malloc_elem_init(start_elem, heap, ms, elem_size); + malloc_elem_mkend(end_elem, start_elem); + malloc_elem_free_list_insert(start_elem); + + heap->total_size += elem_size; +} + +/* + * Iterates through the freelist for a heap to find a free element + * which can store data of the required size and with the requested alignment. + * If size is 0, find the biggest available elem. + * Returns null on failure, or pointer to element on success. + */ +static struct malloc_elem * +find_suitable_element(struct malloc_heap *heap, size_t size, + unsigned flags, size_t align, size_t bound) +{ + size_t idx; + struct malloc_elem *elem, *alt_elem = NULL; + + for (idx = malloc_elem_free_list_index(size); + idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) { + if (malloc_elem_can_hold(elem, size, align, bound)) { + if (check_hugepage_sz(flags, elem->ms->hugepage_sz)) + return elem; + if (alt_elem == NULL) + alt_elem = elem; + } + } + } + + if ((alt_elem != NULL) && (flags & RTE_MEMZONE_SIZE_HINT_ONLY)) + return alt_elem; + + return NULL; +} + +/* + * Main function to allocate a block of memory from the heap. + * It locks the free list, scans it, and adds a new memseg if the + * scan fails. Once the new memseg is added, it re-scans and should return + * the new element after releasing the lock. + */ +void * +malloc_heap_alloc(struct malloc_heap *heap, + const char *type __attribute__((unused)), size_t size, unsigned flags, + size_t align, size_t bound) +{ + struct malloc_elem *elem; + + size = RTE_CACHE_LINE_ROUNDUP(size); + align = RTE_CACHE_LINE_ROUNDUP(align); + + rte_spinlock_lock(&heap->lock); + + elem = find_suitable_element(heap, size, flags, align, bound); + if (elem != NULL) { + elem = malloc_elem_alloc(elem, size, align, bound); + /* increase heap's count of allocated elements */ + heap->alloc_count++; + } + rte_spinlock_unlock(&heap->lock); + + return elem == NULL ? NULL : (void *)(&elem[1]); +} + +/* + * Function to retrieve data for heap on given socket + */ +int +malloc_heap_get_stats(const struct malloc_heap *heap, + struct rte_malloc_socket_stats *socket_stats) +{ + size_t idx; + struct malloc_elem *elem; + + /* Initialise variables for heap */ + socket_stats->free_count = 0; + socket_stats->heap_freesz_bytes = 0; + socket_stats->greatest_free_size = 0; + + /* Iterate through free list */ + for (idx = 0; idx < RTE_HEAP_NUM_FREELISTS; idx++) { + for (elem = LIST_FIRST(&heap->free_head[idx]); + !!elem; elem = LIST_NEXT(elem, free_list)) + { + socket_stats->free_count++; + socket_stats->heap_freesz_bytes += elem->size; + if (elem->size > socket_stats->greatest_free_size) + socket_stats->greatest_free_size = elem->size; + } + } + /* Get stats on overall heap and allocated memory on this heap */ + socket_stats->heap_totalsz_bytes = heap->total_size; + socket_stats->heap_allocsz_bytes = (socket_stats->heap_totalsz_bytes - + socket_stats->heap_freesz_bytes); + socket_stats->alloc_count = heap->alloc_count; + return 0; +} + +int +rte_eal_malloc_heap_init(void) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + unsigned ms_cnt; + struct rte_memseg *ms; + + if (mcfg == NULL) + return -1; + + for (ms = &mcfg->memseg[0], ms_cnt = 0; + (ms_cnt < RTE_MAX_MEMSEG) && (ms->len > 0); + ms_cnt++, ms++) { +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * it is not memory to allocate from. + */ + if (ms->ioremap_addr != 0) + continue; +#endif + malloc_heap_add_memseg(&mcfg->malloc_heaps[ms->socket_id], ms); + } + + return 0; +} diff --git a/lib/librte_eal/common/malloc_heap.h b/lib/librte_eal/common/malloc_heap.h new file mode 100644 index 00000000..3ccbef0f --- /dev/null +++ b/lib/librte_eal/common/malloc_heap.h @@ -0,0 +1,70 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef MALLOC_HEAP_H_ +#define MALLOC_HEAP_H_ + +#include +#include + +#ifdef __cplusplus +extern "C" { +#endif + +static inline unsigned +malloc_get_numa_socket(void) +{ + unsigned socket_id = rte_socket_id(); + + if (socket_id == (unsigned)SOCKET_ID_ANY) + return 0; + + return socket_id; +} + +void * +malloc_heap_alloc(struct malloc_heap *heap, const char *type, size_t size, + unsigned flags, size_t align, size_t bound); + +int +malloc_heap_get_stats(const struct malloc_heap *heap, + struct rte_malloc_socket_stats *socket_stats); + +int +rte_eal_malloc_heap_init(void); + +#ifdef __cplusplus +} +#endif + +#endif /* MALLOC_HEAP_H_ */ diff --git a/lib/librte_eal/common/rte_keepalive.c b/lib/librte_eal/common/rte_keepalive.c new file mode 100644 index 00000000..23363ec1 --- /dev/null +++ b/lib/librte_eal/common/rte_keepalive.c @@ -0,0 +1,149 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2015-2016 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include + +#include +#include +#include +#include +#include +#include +#include + +struct rte_keepalive { + /** Core Liveness. */ + enum rte_keepalive_state { + ALIVE = 1, + MISSING = 0, + DEAD = 2, + GONE = 3 + } __rte_cache_aligned state_flags[RTE_KEEPALIVE_MAXCORES]; + + /** Last-seen-alive timestamps */ + uint64_t last_alive[RTE_KEEPALIVE_MAXCORES]; + + /** + * Cores to check. + * Indexed by core id, non-zero if the core should be checked. + */ + uint8_t active_cores[RTE_KEEPALIVE_MAXCORES]; + + /** Dead core handler. */ + rte_keepalive_failure_callback_t callback; + + /** + * Dead core handler app data. + * Pointer is passed to dead core handler. + */ + void *callback_data; + uint64_t tsc_initial; + uint64_t tsc_mhz; +}; + +static void +print_trace(const char *msg, struct rte_keepalive *keepcfg, int idx_core) +{ + RTE_LOG(INFO, EAL, "%sLast seen %" PRId64 "ms ago.\n", + msg, + ((rte_rdtsc() - keepcfg->last_alive[idx_core])*1000) + / rte_get_tsc_hz() + ); +} + +void +rte_keepalive_dispatch_pings(__rte_unused void *ptr_timer, + void *ptr_data) +{ + struct rte_keepalive *keepcfg = ptr_data; + int idx_core; + + for (idx_core = 0; idx_core < RTE_KEEPALIVE_MAXCORES; idx_core++) { + if (keepcfg->active_cores[idx_core] == 0) + continue; + + switch (keepcfg->state_flags[idx_core]) { + case ALIVE: /* Alive */ + keepcfg->state_flags[idx_core] = MISSING; + keepcfg->last_alive[idx_core] = rte_rdtsc(); + break; + case MISSING: /* MIA */ + print_trace("Core MIA. ", keepcfg, idx_core); + keepcfg->state_flags[idx_core] = DEAD; + break; + case DEAD: /* Dead */ + keepcfg->state_flags[idx_core] = GONE; + print_trace("Core died. ", keepcfg, idx_core); + if (keepcfg->callback) + keepcfg->callback( + keepcfg->callback_data, + idx_core + ); + break; + case GONE: /* Buried */ + break; + } + } +} + +struct rte_keepalive * +rte_keepalive_create(rte_keepalive_failure_callback_t callback, + void *data) +{ + struct rte_keepalive *keepcfg; + + keepcfg = rte_zmalloc("RTE_EAL_KEEPALIVE", + sizeof(struct rte_keepalive), + RTE_CACHE_LINE_SIZE); + if (keepcfg != NULL) { + keepcfg->callback = callback; + keepcfg->callback_data = data; + keepcfg->tsc_initial = rte_rdtsc(); + keepcfg->tsc_mhz = rte_get_tsc_hz() / 1000; + } + return keepcfg; +} + +void +rte_keepalive_register_core(struct rte_keepalive *keepcfg, const int id_core) +{ + if (id_core < RTE_KEEPALIVE_MAXCORES) { + keepcfg->active_cores[id_core] = 1; + keepcfg->last_alive[id_core] = rte_rdtsc(); + } +} + +void +rte_keepalive_mark_alive(struct rte_keepalive *keepcfg) +{ + keepcfg->state_flags[rte_lcore_id()] = ALIVE; +} diff --git a/lib/librte_eal/common/rte_malloc.c b/lib/librte_eal/common/rte_malloc.c new file mode 100644 index 00000000..47deb007 --- /dev/null +++ b/lib/librte_eal/common/rte_malloc.c @@ -0,0 +1,262 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "malloc_elem.h" +#include "malloc_heap.h" + + +/* Free the memory space back to heap */ +void rte_free(void *addr) +{ + if (addr == NULL) return; + if (malloc_elem_free(malloc_elem_from_data(addr)) < 0) + rte_panic("Fatal error: Invalid memory\n"); +} + +/* + * Allocate memory on specified heap. + */ +void * +rte_malloc_socket(const char *type, size_t size, unsigned align, int socket_arg) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + int socket, i; + void *ret; + + /* return NULL if size is 0 or alignment is not power-of-2 */ + if (size == 0 || (align && !rte_is_power_of_2(align))) + return NULL; + + if (!rte_eal_has_hugepages()) + socket_arg = SOCKET_ID_ANY; + + if (socket_arg == SOCKET_ID_ANY) + socket = malloc_get_numa_socket(); + else + socket = socket_arg; + + /* Check socket parameter */ + if (socket >= RTE_MAX_NUMA_NODES) + return NULL; + + ret = malloc_heap_alloc(&mcfg->malloc_heaps[socket], type, + size, 0, align == 0 ? 1 : align, 0); + if (ret != NULL || socket_arg != SOCKET_ID_ANY) + return ret; + + /* try other heaps */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) { + /* we already tried this one */ + if (i == socket) + continue; + + ret = malloc_heap_alloc(&mcfg->malloc_heaps[i], type, + size, 0, align == 0 ? 1 : align, 0); + if (ret != NULL) + return ret; + } + + return NULL; +} + +/* + * Allocate memory on default heap. + */ +void * +rte_malloc(const char *type, size_t size, unsigned align) +{ + return rte_malloc_socket(type, size, align, SOCKET_ID_ANY); +} + +/* + * Allocate zero'd memory on specified heap. + */ +void * +rte_zmalloc_socket(const char *type, size_t size, unsigned align, int socket) +{ + void *ptr = rte_malloc_socket(type, size, align, socket); + + if (ptr != NULL) + memset(ptr, 0, size); + return ptr; +} + +/* + * Allocate zero'd memory on default heap. + */ +void * +rte_zmalloc(const char *type, size_t size, unsigned align) +{ + return rte_zmalloc_socket(type, size, align, SOCKET_ID_ANY); +} + +/* + * Allocate zero'd memory on specified heap. + */ +void * +rte_calloc_socket(const char *type, size_t num, size_t size, unsigned align, int socket) +{ + return rte_zmalloc_socket(type, num * size, align, socket); +} + +/* + * Allocate zero'd memory on default heap. + */ +void * +rte_calloc(const char *type, size_t num, size_t size, unsigned align) +{ + return rte_zmalloc(type, num * size, align); +} + +/* + * Resize allocated memory. + */ +void * +rte_realloc(void *ptr, size_t size, unsigned align) +{ + if (ptr == NULL) + return rte_malloc(NULL, size, align); + + struct malloc_elem *elem = malloc_elem_from_data(ptr); + if (elem == NULL) + rte_panic("Fatal error: memory corruption detected\n"); + + size = RTE_CACHE_LINE_ROUNDUP(size), align = RTE_CACHE_LINE_ROUNDUP(align); + /* check alignment matches first, and if ok, see if we can resize block */ + if (RTE_PTR_ALIGN(ptr,align) == ptr && + malloc_elem_resize(elem, size) == 0) + return ptr; + + /* either alignment is off, or we have no room to expand, + * so move data. */ + void *new_ptr = rte_malloc(NULL, size, align); + if (new_ptr == NULL) + return NULL; + const unsigned old_size = elem->size - MALLOC_ELEM_OVERHEAD; + rte_memcpy(new_ptr, ptr, old_size < size ? old_size : size); + rte_free(ptr); + + return new_ptr; +} + +int +rte_malloc_validate(const void *ptr, size_t *size) +{ + const struct malloc_elem *elem = malloc_elem_from_data(ptr); + if (!malloc_elem_cookies_ok(elem)) + return -1; + if (size != NULL) + *size = elem->size - elem->pad - MALLOC_ELEM_OVERHEAD; + return 0; +} + +/* + * Function to retrieve data for heap on given socket + */ +int +rte_malloc_get_socket_stats(int socket, + struct rte_malloc_socket_stats *socket_stats) +{ + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + + if (socket >= RTE_MAX_NUMA_NODES || socket < 0) + return -1; + + return malloc_heap_get_stats(&mcfg->malloc_heaps[socket], socket_stats); +} + +/* + * Print stats on memory type. If type is NULL, info on all types is printed + */ +void +rte_malloc_dump_stats(FILE *f, __rte_unused const char *type) +{ + unsigned int socket; + struct rte_malloc_socket_stats sock_stats; + /* Iterate through all initialised heaps */ + for (socket=0; socket< RTE_MAX_NUMA_NODES; socket++) { + if ((rte_malloc_get_socket_stats(socket, &sock_stats) < 0)) + continue; + + fprintf(f, "Socket:%u\n", socket); + fprintf(f, "\tHeap_size:%zu,\n", sock_stats.heap_totalsz_bytes); + fprintf(f, "\tFree_size:%zu,\n", sock_stats.heap_freesz_bytes); + fprintf(f, "\tAlloc_size:%zu,\n", sock_stats.heap_allocsz_bytes); + fprintf(f, "\tGreatest_free_size:%zu,\n", + sock_stats.greatest_free_size); + fprintf(f, "\tAlloc_count:%u,\n",sock_stats.alloc_count); + fprintf(f, "\tFree_count:%u,\n", sock_stats.free_count); + } + return; +} + +/* + * TODO: Set limit to memory that can be allocated to memory type + */ +int +rte_malloc_set_limit(__rte_unused const char *type, + __rte_unused size_t max) +{ + return 0; +} + +/* + * Return the physical address of a virtual address obtained through rte_malloc + */ +phys_addr_t +rte_malloc_virt2phy(const void *addr) +{ + const struct malloc_elem *elem = malloc_elem_from_data(addr); + if (elem == NULL) + return 0; + return elem->ms->phys_addr + ((uintptr_t)addr - (uintptr_t)elem->ms->addr); +} diff --git a/lib/librte_eal/linuxapp/Makefile b/lib/librte_eal/linuxapp/Makefile new file mode 100644 index 00000000..20d2a916 --- /dev/null +++ b/lib/librte_eal/linuxapp/Makefile @@ -0,0 +1,39 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +DIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal +DIRS-$(CONFIG_RTE_EAL_IGB_UIO) += igb_uio +DIRS-$(CONFIG_RTE_KNI_KMOD) += kni +DIRS-$(CONFIG_RTE_LIBRTE_XEN_DOM0) += xen_dom0 + +include $(RTE_SDK)/mk/rte.subdir.mk diff --git a/lib/librte_eal/linuxapp/eal/Makefile b/lib/librte_eal/linuxapp/eal/Makefile new file mode 100644 index 00000000..e1093619 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/Makefile @@ -0,0 +1,139 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2016 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_eal.a + +ARCH_DIR ?= $(RTE_ARCH) +EXPORT_MAP := rte_eal_version.map +VPATH += $(RTE_SDK)/lib/librte_eal/common/arch/$(ARCH_DIR) + +LIBABIVER := 2 + +VPATH += $(RTE_SDK)/lib/librte_eal/common + +CFLAGS += -I$(SRCDIR)/include +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common +CFLAGS += -I$(RTE_SDK)/lib/librte_eal/common/include +CFLAGS += -I$(RTE_SDK)/lib/librte_ring +CFLAGS += -I$(RTE_SDK)/lib/librte_mempool +CFLAGS += -I$(RTE_SDK)/lib/librte_ivshmem +CFLAGS += $(WERROR_FLAGS) -O3 + +LDLIBS += -ldl +LDLIBS += -lpthread +LDLIBS += -lgcc_s +LDLIBS += -lrt + +# specific to linuxapp exec-env +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) := eal.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_hugepage_info.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_memory.c +ifeq ($(CONFIG_RTE_LIBRTE_XEN_DOM0),y) +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_xen_memory.c +endif +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_uio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_pci_vfio_mp_sync.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_debug.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_interrupts.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_alarm.c +ifeq ($(CONFIG_RTE_LIBRTE_IVSHMEM),y) +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_ivshmem.c +endif + +# from common dir +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_lcore.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_timer.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memzone.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_log.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_launch.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_pci_uio.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_memory.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_tailqs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_errno.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_cpuflags.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_string_fns.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_hexdump.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_devargs.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_dev.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_options.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_thread.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += eal_common_proc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_malloc.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_elem.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += malloc_heap.c +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_keepalive.c + +# from arch dir +SRCS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += rte_cpuflags.c + +CFLAGS_eal_common_cpuflags.o := $(CPUFLAGS_LIST) + +CFLAGS_eal.o := -D_GNU_SOURCE +CFLAGS_eal_interrupts.o := -D_GNU_SOURCE +CFLAGS_eal_pci_vfio_mp_sync.o := -D_GNU_SOURCE +CFLAGS_eal_timer.o := -D_GNU_SOURCE +CFLAGS_eal_lcore.o := -D_GNU_SOURCE +CFLAGS_eal_thread.o := -D_GNU_SOURCE +CFLAGS_eal_log.o := -D_GNU_SOURCE +CFLAGS_eal_common_log.o := -D_GNU_SOURCE +CFLAGS_eal_hugepage_info.o := -D_GNU_SOURCE +CFLAGS_eal_pci.o := -D_GNU_SOURCE +CFLAGS_eal_pci_uio.o := -D_GNU_SOURCE +CFLAGS_eal_pci_vfio.o := -D_GNU_SOURCE +CFLAGS_eal_common_whitelist.o := -D_GNU_SOURCE +CFLAGS_eal_common_options.o := -D_GNU_SOURCE +CFLAGS_eal_common_thread.o := -D_GNU_SOURCE +CFLAGS_eal_common_lcore.o := -D_GNU_SOURCE + +# workaround for a gcc bug with noreturn attribute +# http://gcc.gnu.org/bugzilla/show_bug.cgi?id=12603 +ifeq ($(CONFIG_RTE_TOOLCHAIN_GCC),y) +CFLAGS_eal_thread.o += -Wno-return-type +endif + +INC := rte_interrupts.h rte_kni_common.h rte_dom0_common.h + +SYMLINK-$(CONFIG_RTE_EXEC_ENV_LINUXAPP)-include/exec-env := \ + $(addprefix include/exec-env/,$(INC)) + +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common +DEPDIRS-$(CONFIG_RTE_EXEC_ENV_LINUXAPP) += lib/librte_eal/common/arch/$(ARCH_DIR) + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/lib/librte_eal/linuxapp/eal/eal.c b/lib/librte_eal/linuxapp/eal/eal.c new file mode 100644 index 00000000..8aafd519 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal.c @@ -0,0 +1,936 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2016 Intel Corporation. All rights reserved. + * Copyright(c) 2012-2014 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#if defined(RTE_ARCH_X86) +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" +#include "eal_options.h" + +#define MEMSIZE_IF_NO_HUGE_PAGE (64ULL * 1024ULL * 1024ULL) + +#define SOCKET_MEM_STRLEN (RTE_MAX_NUMA_NODES * 10) + +/* Allow the application to print its usage message too if set */ +static rte_usage_hook_t rte_application_usage_hook = NULL; + +/* early configuration structure, when memory config is not mmapped */ +static struct rte_mem_config early_mem_config; + +/* define fd variable here, because file needs to be kept open for the + * duration of the program, as we hold a write lock on it in the primary proc */ +static int mem_cfg_fd = -1; + +static struct flock wr_lock = { + .l_type = F_WRLCK, + .l_whence = SEEK_SET, + .l_start = offsetof(struct rte_mem_config, memseg), + .l_len = sizeof(early_mem_config.memseg), +}; + +/* Address of global and public configuration */ +static struct rte_config rte_config = { + .mem_config = &early_mem_config, +}; + +/* internal configuration (per-core) */ +struct lcore_config lcore_config[RTE_MAX_LCORE]; + +/* internal configuration */ +struct internal_config internal_config; + +/* used by rte_rdtsc() */ +int rte_cycles_vmware_tsc_map; + +/* Return a pointer to the configuration structure */ +struct rte_config * +rte_eal_get_configuration(void) +{ + return &rte_config; +} + +/* parse a sysfs (or other) file containing one integer value */ +int +eal_parse_sysfs_value(const char *filename, unsigned long *val) +{ + FILE *f; + char buf[BUFSIZ]; + char *end = NULL; + + if ((f = fopen(filename, "r")) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs value %s\n", + __func__, filename); + return -1; + } + + if (fgets(buf, sizeof(buf), f) == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot read sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + *val = strtoul(buf, &end, 0); + if ((buf[0] == '\0') || (end == NULL) || (*end != '\n')) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs value %s\n", + __func__, filename); + fclose(f); + return -1; + } + fclose(f); + return 0; +} + + +/* create memory configuration in shared/mmap memory. Take out + * a write lock on the memsegs, so we can auto-detect primary/secondary. + * This means we never close the file while running (auto-close on exit). + * We also don't lock the whole file, so that in future we can use read-locks + * on other parts, e.g. memzones, to detect if there are running secondary + * processes. */ +static void +rte_eal_config_create(void) +{ + void *rte_mem_cfg_addr; + int retval; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + /* map the config before hugepage address so that we don't waste a page */ + if (internal_config.base_virtaddr != 0) + rte_mem_cfg_addr = (void *) + RTE_ALIGN_FLOOR(internal_config.base_virtaddr - + sizeof(struct rte_mem_config), sysconf(_SC_PAGE_SIZE)); + else + rte_mem_cfg_addr = NULL; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR | O_CREAT, 0660); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + retval = ftruncate(mem_cfg_fd, sizeof(*rte_config.mem_config)); + if (retval < 0){ + close(mem_cfg_fd); + rte_panic("Cannot resize '%s' for rte_mem_config\n", pathname); + } + + retval = fcntl(mem_cfg_fd, F_SETLK, &wr_lock); + if (retval < 0){ + close(mem_cfg_fd); + rte_exit(EXIT_FAILURE, "Cannot create lock on '%s'. Is another primary " + "process running?\n", pathname); + } + + rte_mem_cfg_addr = mmap(rte_mem_cfg_addr, sizeof(*rte_config.mem_config), + PROT_READ | PROT_WRITE, MAP_SHARED, mem_cfg_fd, 0); + + if (rte_mem_cfg_addr == MAP_FAILED){ + rte_panic("Cannot mmap memory for rte_config\n"); + } + memcpy(rte_mem_cfg_addr, &early_mem_config, sizeof(early_mem_config)); + rte_config.mem_config = (struct rte_mem_config *) rte_mem_cfg_addr; + + /* store address of the config in the config itself so that secondary + * processes could later map the config into this exact location */ + rte_config.mem_config->mem_cfg_addr = (uintptr_t) rte_mem_cfg_addr; + +} + +/* attach to an existing shared memory config */ +static void +rte_eal_config_attach(void) +{ + struct rte_mem_config *mem_config; + + const char *pathname = eal_runtime_config_path(); + + if (internal_config.no_shconf) + return; + + if (mem_cfg_fd < 0){ + mem_cfg_fd = open(pathname, O_RDWR); + if (mem_cfg_fd < 0) + rte_panic("Cannot open '%s' for rte_mem_config\n", pathname); + } + + /* map it as read-only first */ + mem_config = (struct rte_mem_config *) mmap(NULL, sizeof(*mem_config), + PROT_READ, MAP_SHARED, mem_cfg_fd, 0); + if (mem_config == MAP_FAILED) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = mem_config; +} + +/* reattach the shared config at exact memory location primary process has it */ +static void +rte_eal_config_reattach(void) +{ + struct rte_mem_config *mem_config; + void *rte_mem_cfg_addr; + + if (internal_config.no_shconf) + return; + + /* save the address primary process has mapped shared config to */ + rte_mem_cfg_addr = (void *) (uintptr_t) rte_config.mem_config->mem_cfg_addr; + + /* unmap original config */ + munmap(rte_config.mem_config, sizeof(struct rte_mem_config)); + + /* remap the config at proper address */ + mem_config = (struct rte_mem_config *) mmap(rte_mem_cfg_addr, + sizeof(*mem_config), PROT_READ | PROT_WRITE, MAP_SHARED, + mem_cfg_fd, 0); + close(mem_cfg_fd); + if (mem_config == MAP_FAILED || mem_config != rte_mem_cfg_addr) + rte_panic("Cannot mmap memory for rte_config\n"); + + rte_config.mem_config = mem_config; +} + +/* Detect if we are a primary or a secondary process */ +enum rte_proc_type_t +eal_proc_type_detect(void) +{ + enum rte_proc_type_t ptype = RTE_PROC_PRIMARY; + const char *pathname = eal_runtime_config_path(); + + /* if we can open the file but not get a write-lock we are a secondary + * process. NOTE: if we get a file handle back, we keep that open + * and don't close it to prevent a race condition between multiple opens */ + if (((mem_cfg_fd = open(pathname, O_RDWR)) >= 0) && + (fcntl(mem_cfg_fd, F_SETLK, &wr_lock) < 0)) + ptype = RTE_PROC_SECONDARY; + + RTE_LOG(INFO, EAL, "Auto-detected process type: %s\n", + ptype == RTE_PROC_PRIMARY ? "PRIMARY" : "SECONDARY"); + + return ptype; +} + +/* Sets up rte_config structure with the pointer to shared memory config.*/ +static void +rte_config_init(void) +{ + rte_config.process_type = internal_config.process_type; + + switch (rte_config.process_type){ + case RTE_PROC_PRIMARY: + rte_eal_config_create(); + break; + case RTE_PROC_SECONDARY: + rte_eal_config_attach(); + rte_eal_mcfg_wait_complete(rte_config.mem_config); + rte_eal_config_reattach(); + break; + case RTE_PROC_AUTO: + case RTE_PROC_INVALID: + rte_panic("Invalid process type\n"); + } +} + +/* Unlocks hugepage directories that were locked by eal_hugepage_info_init */ +static void +eal_hugedirs_unlock(void) +{ + int i; + + for (i = 0; i < MAX_HUGEPAGE_SIZES; i++) + { + /* skip uninitialized */ + if (internal_config.hugepage_info[i].lock_descriptor < 0) + continue; + /* unlock hugepage file */ + flock(internal_config.hugepage_info[i].lock_descriptor, LOCK_UN); + close(internal_config.hugepage_info[i].lock_descriptor); + /* reset the field */ + internal_config.hugepage_info[i].lock_descriptor = -1; + } +} + +/* display usage */ +static void +eal_usage(const char *prgname) +{ + printf("\nUsage: %s ", prgname); + eal_common_usage(); + printf("EAL Linux options:\n" + " --"OPT_SOCKET_MEM" Memory to allocate on sockets (comma separated values)\n" + " --"OPT_HUGE_DIR" Directory where hugetlbfs is mounted\n" + " --"OPT_FILE_PREFIX" Prefix for hugepage filenames\n" + " --"OPT_BASE_VIRTADDR" Base virtual address\n" + " --"OPT_CREATE_UIO_DEV" Create /dev/uioX (usually done by hotplug)\n" + " --"OPT_VFIO_INTR" Interrupt mode for VFIO (legacy|msi|msix)\n" + " --"OPT_XEN_DOM0" Support running on Xen dom0 without hugetlbfs\n" + "\n"); + /* Allow the application to print its usage message too if hook is set */ + if ( rte_application_usage_hook ) { + printf("===== Application Usage =====\n\n"); + rte_application_usage_hook(prgname); + } +} + +/* Set a per-application usage message */ +rte_usage_hook_t +rte_set_application_usage_hook( rte_usage_hook_t usage_func ) +{ + rte_usage_hook_t old_func; + + /* Will be NULL on the first call to denote the last usage routine. */ + old_func = rte_application_usage_hook; + rte_application_usage_hook = usage_func; + + return old_func; +} + +static int +eal_parse_socket_mem(char *socket_mem) +{ + char * arg[RTE_MAX_NUMA_NODES]; + char *end; + int arg_num, i, len; + uint64_t total_mem = 0; + + len = strnlen(socket_mem, SOCKET_MEM_STRLEN); + if (len == SOCKET_MEM_STRLEN) { + RTE_LOG(ERR, EAL, "--socket-mem is too long\n"); + return -1; + } + + /* all other error cases will be caught later */ + if (!isdigit(socket_mem[len-1])) + return -1; + + /* split the optarg into separate socket values */ + arg_num = rte_strsplit(socket_mem, len, + arg, RTE_MAX_NUMA_NODES, ','); + + /* if split failed, or 0 arguments */ + if (arg_num <= 0) + return -1; + + internal_config.force_sockets = 1; + + /* parse each defined socket option */ + errno = 0; + for (i = 0; i < arg_num; i++) { + end = NULL; + internal_config.socket_mem[i] = strtoull(arg[i], &end, 10); + + /* check for invalid input */ + if ((errno != 0) || + (arg[i][0] == '\0') || (end == NULL) || (*end != '\0')) + return -1; + internal_config.socket_mem[i] *= 1024ULL; + internal_config.socket_mem[i] *= 1024ULL; + total_mem += internal_config.socket_mem[i]; + } + + /* check if we have a positive amount of total memory */ + if (total_mem == 0) + return -1; + + return 0; +} + +static int +eal_parse_base_virtaddr(const char *arg) +{ + char *end; + uint64_t addr; + + errno = 0; + addr = strtoull(arg, &end, 16); + + /* check for errors */ + if ((errno != 0) || (arg[0] == '\0') || end == NULL || (*end != '\0')) + return -1; + + /* make sure we don't exceed 32-bit boundary on 32-bit target */ +#ifndef RTE_ARCH_64 + if (addr >= UINTPTR_MAX) + return -1; +#endif + + /* align the addr on 16M boundary, 16MB is the minimum huge page + * size on IBM Power architecture. If the addr is aligned to 16MB, + * it can align to 2MB for x86. So this alignment can also be used + * on x86 */ + internal_config.base_virtaddr = + RTE_PTR_ALIGN_CEIL((uintptr_t)addr, (size_t)RTE_PGSIZE_16M); + + return 0; +} + +static int +eal_parse_vfio_intr(const char *mode) +{ + unsigned i; + static struct { + const char *name; + enum rte_intr_mode value; + } map[] = { + { "legacy", RTE_INTR_MODE_LEGACY }, + { "msi", RTE_INTR_MODE_MSI }, + { "msix", RTE_INTR_MODE_MSIX }, + }; + + for (i = 0; i < RTE_DIM(map); i++) { + if (!strcmp(mode, map[i].name)) { + internal_config.vfio_intr_mode = map[i].value; + return 0; + } + } + return -1; +} + +static inline size_t +eal_get_hugepage_mem_size(void) +{ + uint64_t size = 0; + unsigned i, j; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++) { + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + size += hpi->hugepage_sz * hpi->num_pages[j]; + } + } + } + + return (size < SIZE_MAX) ? (size_t)(size) : SIZE_MAX; +} + +/* Parse the arguments for --log-level only */ +static void +eal_log_level_parse(int argc, char **argv) +{ + int opt; + char **argvopt; + int option_index; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + eal_reset_internal_config(&internal_config); + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + int ret; + + /* getopt is not happy, stop right now */ + if (opt == '?') + break; + + ret = (opt == OPT_LOG_LEVEL_NUM) ? + eal_parse_common_option(opt, optarg, &internal_config) : 0; + + /* common parser is not happy */ + if (ret < 0) + break; + } + + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; +} + +/* Parse the argument given in the command line of the application */ +static int +eal_parse_args(int argc, char **argv) +{ + int opt, ret; + char **argvopt; + int option_index; + char *prgname = argv[0]; + const int old_optind = optind; + const int old_optopt = optopt; + char * const old_optarg = optarg; + + argvopt = argv; + optind = 1; + + while ((opt = getopt_long(argc, argvopt, eal_short_options, + eal_long_options, &option_index)) != EOF) { + + /* getopt is not happy, stop right now */ + if (opt == '?') { + eal_usage(prgname); + ret = -1; + goto out; + } + + ret = eal_parse_common_option(opt, optarg, &internal_config); + /* common parser is not happy */ + if (ret < 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + /* common parser handled this option */ + if (ret == 0) + continue; + + switch (opt) { + case 'h': + eal_usage(prgname); + exit(EXIT_SUCCESS); + + /* long options */ + case OPT_XEN_DOM0_NUM: +#ifdef RTE_LIBRTE_XEN_DOM0 + internal_config.xen_dom0_support = 1; +#else + RTE_LOG(ERR, EAL, "Can't support DPDK app " + "running on Dom0, please configure" + " RTE_LIBRTE_XEN_DOM0=y\n"); + ret = -1; + goto out; +#endif + break; + + case OPT_HUGE_DIR_NUM: + internal_config.hugepage_dir = optarg; + break; + + case OPT_FILE_PREFIX_NUM: + internal_config.hugefile_prefix = optarg; + break; + + case OPT_SOCKET_MEM_NUM: + if (eal_parse_socket_mem(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_SOCKET_MEM "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_BASE_VIRTADDR_NUM: + if (eal_parse_base_virtaddr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameter for --" + OPT_BASE_VIRTADDR "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_VFIO_INTR_NUM: + if (eal_parse_vfio_intr(optarg) < 0) { + RTE_LOG(ERR, EAL, "invalid parameters for --" + OPT_VFIO_INTR "\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + break; + + case OPT_CREATE_UIO_DEV_NUM: + internal_config.create_uio_dev = 1; + break; + + default: + if (opt < OPT_LONG_MIN_NUM && isprint(opt)) { + RTE_LOG(ERR, EAL, "Option %c is not supported " + "on Linux\n", opt); + } else if (opt >= OPT_LONG_MIN_NUM && + opt < OPT_LONG_MAX_NUM) { + RTE_LOG(ERR, EAL, "Option %s is not supported " + "on Linux\n", + eal_long_options[option_index].name); + } else { + RTE_LOG(ERR, EAL, "Option %d is not supported " + "on Linux\n", opt); + } + eal_usage(prgname); + ret = -1; + goto out; + } + } + + if (eal_adjust_config(&internal_config) != 0) { + ret = -1; + goto out; + } + + /* sanity checks */ + if (eal_check_common_options(&internal_config) != 0) { + eal_usage(prgname); + ret = -1; + goto out; + } + + /* --xen-dom0 doesn't make sense with --socket-mem */ + if (internal_config.xen_dom0_support && internal_config.force_sockets == 1) { + RTE_LOG(ERR, EAL, "Options --"OPT_SOCKET_MEM" cannot be specified " + "together with --"OPT_XEN_DOM0"\n"); + eal_usage(prgname); + ret = -1; + goto out; + } + + if (optind >= 0) + argv[optind-1] = prgname; + ret = optind-1; + +out: + /* restore getopt lib */ + optind = old_optind; + optopt = old_optopt; + optarg = old_optarg; + + return ret; +} + +static void +eal_check_mem_on_local_socket(void) +{ + const struct rte_memseg *ms; + int i, socket_id; + + socket_id = rte_lcore_to_socket_id(rte_config.master_lcore); + + ms = rte_eal_get_physmem_layout(); + + for (i = 0; i < RTE_MAX_MEMSEG; i++) + if (ms[i].socket_id == socket_id && + ms[i].len > 0) + return; + + RTE_LOG(WARNING, EAL, "WARNING: Master core has no " + "memory on local socket!\n"); +} + +static int +sync_func(__attribute__((unused)) void *arg) +{ + return 0; +} + +inline static void +rte_eal_mcfg_complete(void) +{ + /* ALL shared mem_config related INIT DONE */ + if (rte_config.process_type == RTE_PROC_PRIMARY) + rte_config.mem_config->magic = RTE_MAGIC; +} + +/* + * Request iopl privilege for all RPL, returns 0 on success + * iopl() call is mostly for the i386 architecture. For other architectures, + * return -1 to indicate IO privilege can't be changed in this way. + */ +int +rte_eal_iopl_init(void) +{ +#if defined(RTE_ARCH_X86) + if (iopl(3) != 0) + return -1; + return 0; +#elif defined(RTE_ARCH_ARM) || defined(RTE_ARCH_ARM64) + return 0; /* iopl syscall not supported for ARM/ARM64 */ +#else + return -1; +#endif +} + +/* Launch threads, called at application init(). */ +int +rte_eal_init(int argc, char **argv) +{ + int i, fctret, ret; + pthread_t thread_id; + static rte_atomic32_t run_once = RTE_ATOMIC32_INIT(0); + const char *logid; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (!rte_atomic32_test_and_set(&run_once)) + return -1; + + logid = strrchr(argv[0], '/'); + logid = strdup(logid ? logid + 1: argv[0]); + + thread_id = pthread_self(); + + if (rte_eal_log_early_init() < 0) + rte_panic("Cannot init early logs\n"); + + eal_log_level_parse(argc, argv); + + /* set log level as early as possible */ + rte_set_log_level(internal_config.log_level); + + if (rte_eal_cpu_init() < 0) + rte_panic("Cannot detect lcores\n"); + + fctret = eal_parse_args(argc, argv); + if (fctret < 0) + exit(1); + + if (internal_config.no_hugetlbfs == 0 && + internal_config.process_type != RTE_PROC_SECONDARY && + internal_config.xen_dom0_support == 0 && + eal_hugepage_info_init() < 0) + rte_panic("Cannot get hugepage information\n"); + + if (internal_config.memory == 0 && internal_config.force_sockets == 0) { + if (internal_config.no_hugetlbfs) + internal_config.memory = MEMSIZE_IF_NO_HUGE_PAGE; + else + internal_config.memory = eal_get_hugepage_mem_size(); + } + + if (internal_config.vmware_tsc_map == 1) { +#ifdef RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT + rte_cycles_vmware_tsc_map = 1; + RTE_LOG (DEBUG, EAL, "Using VMWARE TSC MAP, " + "you must have monitor_control.pseudo_perfctr = TRUE\n"); +#else + RTE_LOG (WARNING, EAL, "Ignoring --vmware-tsc-map because " + "RTE_LIBRTE_EAL_VMWARE_TSC_MAP_SUPPORT is not set\n"); +#endif + } + + rte_srand(rte_rdtsc()); + + rte_config_init(); + + if (rte_eal_pci_init() < 0) + rte_panic("Cannot init PCI\n"); + +#ifdef RTE_LIBRTE_IVSHMEM + if (rte_eal_ivshmem_init() < 0) + rte_panic("Cannot init IVSHMEM\n"); +#endif + + if (rte_eal_memory_init() < 0) + rte_panic("Cannot init memory\n"); + + /* the directories are locked during eal_hugepage_info_init */ + eal_hugedirs_unlock(); + + if (rte_eal_memzone_init() < 0) + rte_panic("Cannot init memzone\n"); + + if (rte_eal_tailqs_init() < 0) + rte_panic("Cannot init tail queues for objects\n"); + +#ifdef RTE_LIBRTE_IVSHMEM + if (rte_eal_ivshmem_obj_init() < 0) + rte_panic("Cannot init IVSHMEM objects\n"); +#endif + + if (rte_eal_log_init(logid, internal_config.syslog_facility) < 0) + rte_panic("Cannot init logs\n"); + + if (rte_eal_alarm_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + if (rte_eal_timer_init() < 0) + rte_panic("Cannot init HPET or TSC timers\n"); + + eal_check_mem_on_local_socket(); + + if (eal_plugins_init() < 0) + rte_panic("Cannot init plugins\n"); + + eal_thread_init_master(rte_config.master_lcore); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "Master lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + rte_config.master_lcore, (int)thread_id, cpuset, + ret == 0 ? "" : "..."); + + if (rte_eal_dev_init() < 0) + rte_panic("Cannot init pmd devices\n"); + + if (rte_eal_intr_init() < 0) + rte_panic("Cannot init interrupt-handling thread\n"); + + RTE_LCORE_FOREACH_SLAVE(i) { + + /* + * create communication pipes between master thread + * and children + */ + if (pipe(lcore_config[i].pipe_master2slave) < 0) + rte_panic("Cannot create pipe\n"); + if (pipe(lcore_config[i].pipe_slave2master) < 0) + rte_panic("Cannot create pipe\n"); + + lcore_config[i].state = WAIT; + + /* create a thread for each lcore */ + ret = pthread_create(&lcore_config[i].thread_id, NULL, + eal_thread_loop, NULL); + if (ret != 0) + rte_panic("Cannot create thread\n"); + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "lcore-slave-%d", i); + ret = rte_thread_setname(lcore_config[i].thread_id, + thread_name); + if (ret != 0) + RTE_LOG(ERR, EAL, + "Cannot set name for lcore thread\n"); + } + + /* + * Launch a dummy function on all slave lcores, so that master lcore + * knows they are all ready when this function returns. + */ + rte_eal_mp_remote_launch(sync_func, NULL, SKIP_MASTER); + rte_eal_mp_wait_lcore(); + + /* Probe & Initialize PCI devices */ + if (rte_eal_pci_probe()) + rte_panic("Cannot probe PCI\n"); + + rte_eal_mcfg_complete(); + + return fctret; +} + +/* get core role */ +enum rte_lcore_role_t +rte_eal_lcore_role(unsigned lcore_id) +{ + return rte_config.lcore_role[lcore_id]; +} + +enum rte_proc_type_t +rte_eal_process_type(void) +{ + return rte_config.process_type; +} + +int rte_eal_has_hugepages(void) +{ + return ! internal_config.no_hugetlbfs; +} + +int +rte_eal_check_module(const char *module_name) +{ + char sysfs_mod_name[PATH_MAX]; + struct stat st; + int n; + + if (NULL == module_name) + return -1; + + /* Check if there is sysfs mounted */ + if (stat("/sys/module", &st) != 0) { + RTE_LOG(DEBUG, EAL, "sysfs is not mounted! error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + + /* A module might be built-in, therefore try sysfs */ + n = snprintf(sysfs_mod_name, PATH_MAX, "/sys/module/%s", module_name); + if (n < 0 || n > PATH_MAX) { + RTE_LOG(DEBUG, EAL, "Could not format module path\n"); + return -1; + } + + if (stat(sysfs_mod_name, &st) != 0) { + RTE_LOG(DEBUG, EAL, "Module %s not found! error %i (%s)\n", + sysfs_mod_name, errno, strerror(errno)); + return 0; + } + + /* Module has been found */ + return 1; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_alarm.c b/lib/librte_eal/linuxapp/eal/eal_alarm.c new file mode 100644 index 00000000..8b042abc --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_alarm.c @@ -0,0 +1,273 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifndef TFD_NONBLOCK +#include +#define TFD_NONBLOCK O_NONBLOCK +#endif + +#define NS_PER_US 1000 +#define US_PER_MS 1000 +#define MS_PER_S 1000 +#define US_PER_S (US_PER_MS * MS_PER_S) + +#ifdef CLOCK_MONOTONIC_RAW /* Defined in glibc bits/time.h */ +#define CLOCK_TYPE_ID CLOCK_MONOTONIC_RAW +#else +#define CLOCK_TYPE_ID CLOCK_MONOTONIC +#endif + +struct alarm_entry { + LIST_ENTRY(alarm_entry) next; + struct timeval time; + rte_eal_alarm_callback cb_fn; + void *cb_arg; + volatile uint8_t executing; + volatile pthread_t executing_id; +}; + +static LIST_HEAD(alarm_list, alarm_entry) alarm_list = LIST_HEAD_INITIALIZER(); +static rte_spinlock_t alarm_list_lk = RTE_SPINLOCK_INITIALIZER; + +static struct rte_intr_handle intr_handle = {.fd = -1 }; +static int handler_registered = 0; +static void eal_alarm_callback(struct rte_intr_handle *hdl, void *arg); + +int +rte_eal_alarm_init(void) +{ + intr_handle.type = RTE_INTR_HANDLE_ALARM; + /* create a timerfd file descriptor */ + intr_handle.fd = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); + if (intr_handle.fd == -1) + goto error; + + return 0; + +error: + rte_errno = errno; + return -1; +} + +static void +eal_alarm_callback(struct rte_intr_handle *hdl __rte_unused, + void *arg __rte_unused) +{ + struct timespec now; + struct alarm_entry *ap; + + rte_spinlock_lock(&alarm_list_lk); + while ((ap = LIST_FIRST(&alarm_list)) !=NULL && + clock_gettime(CLOCK_TYPE_ID, &now) == 0 && + (ap->time.tv_sec < now.tv_sec || (ap->time.tv_sec == now.tv_sec && + (ap->time.tv_usec * NS_PER_US) <= now.tv_nsec))) { + ap->executing = 1; + ap->executing_id = pthread_self(); + rte_spinlock_unlock(&alarm_list_lk); + + ap->cb_fn(ap->cb_arg); + + rte_spinlock_lock(&alarm_list_lk); + + LIST_REMOVE(ap, next); + rte_free(ap); + } + + if (!LIST_EMPTY(&alarm_list)) { + struct itimerspec atime = { .it_interval = { 0, 0 } }; + + ap = LIST_FIRST(&alarm_list); + atime.it_value.tv_sec = ap->time.tv_sec; + atime.it_value.tv_nsec = ap->time.tv_usec * NS_PER_US; + /* perform borrow for subtraction if necessary */ + if (now.tv_nsec > (ap->time.tv_usec * NS_PER_US)) + atime.it_value.tv_sec--, atime.it_value.tv_nsec += US_PER_S * NS_PER_US; + + atime.it_value.tv_sec -= now.tv_sec; + atime.it_value.tv_nsec -= now.tv_nsec; + timerfd_settime(intr_handle.fd, 0, &atime, NULL); + } + rte_spinlock_unlock(&alarm_list_lk); +} + +int +rte_eal_alarm_set(uint64_t us, rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct timespec now; + int ret = 0; + struct alarm_entry *ap, *new_alarm; + + /* Check parameters, including that us won't cause a uint64_t overflow */ + if (us < 1 || us > (UINT64_MAX - US_PER_S) || cb_fn == NULL) + return -EINVAL; + + new_alarm = rte_zmalloc(NULL, sizeof(*new_alarm), 0); + if (new_alarm == NULL) + return -ENOMEM; + + /* use current time to calculate absolute time of alarm */ + clock_gettime(CLOCK_TYPE_ID, &now); + + new_alarm->cb_fn = cb_fn; + new_alarm->cb_arg = cb_arg; + new_alarm->time.tv_usec = ((now.tv_nsec / NS_PER_US) + us) % US_PER_S; + new_alarm->time.tv_sec = now.tv_sec + (((now.tv_nsec / NS_PER_US) + us) / US_PER_S); + + rte_spinlock_lock(&alarm_list_lk); + if (!handler_registered) { + ret |= rte_intr_callback_register(&intr_handle, + eal_alarm_callback, NULL); + handler_registered = (ret == 0) ? 1 : 0; + } + + if (LIST_EMPTY(&alarm_list)) + LIST_INSERT_HEAD(&alarm_list, new_alarm, next); + else { + LIST_FOREACH(ap, &alarm_list, next) { + if (ap->time.tv_sec > new_alarm->time.tv_sec || + (ap->time.tv_sec == new_alarm->time.tv_sec && + ap->time.tv_usec > new_alarm->time.tv_usec)){ + LIST_INSERT_BEFORE(ap, new_alarm, next); + break; + } + if (LIST_NEXT(ap, next) == NULL) { + LIST_INSERT_AFTER(ap, new_alarm, next); + break; + } + } + } + + if (LIST_FIRST(&alarm_list) == new_alarm) { + struct itimerspec alarm_time = { + .it_interval = {0, 0}, + .it_value = { + .tv_sec = us / US_PER_S, + .tv_nsec = (us % US_PER_S) * NS_PER_US, + }, + }; + ret |= timerfd_settime(intr_handle.fd, 0, &alarm_time, NULL); + } + rte_spinlock_unlock(&alarm_list_lk); + + return ret; +} + +int +rte_eal_alarm_cancel(rte_eal_alarm_callback cb_fn, void *cb_arg) +{ + struct alarm_entry *ap, *ap_prev; + int count = 0; + int err = 0; + int executing; + + if (!cb_fn) { + rte_errno = EINVAL; + return -1; + } + + do { + executing = 0; + rte_spinlock_lock(&alarm_list_lk); + /* remove any matches at the start of the list */ + while ((ap = LIST_FIRST(&alarm_list)) != NULL && + cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { + + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + rte_free(ap); + count++; + } else { + /* If calling from other context, mark that alarm is executing + * so loop can spin till it finish. Otherwise we are trying to + * cancel our self - mark it by EINPROGRESS */ + if (pthread_equal(ap->executing_id, pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + + break; + } + } + ap_prev = ap; + + /* now go through list, removing entries not at start */ + LIST_FOREACH(ap, &alarm_list, next) { + /* this won't be true first time through */ + if (cb_fn == ap->cb_fn && + (cb_arg == (void *)-1 || cb_arg == ap->cb_arg)) { + + if (ap->executing == 0) { + LIST_REMOVE(ap, next); + rte_free(ap); + count++; + ap = ap_prev; + } else if (pthread_equal(ap->executing_id, pthread_self()) == 0) + executing++; + else + err = EINPROGRESS; + } + ap_prev = ap; + } + rte_spinlock_unlock(&alarm_list_lk); + } while (executing != 0); + + if (count == 0 && err == 0) + rte_errno = ENOENT; + else if (err) + rte_errno = err; + + return count; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_debug.c b/lib/librte_eal/linuxapp/eal/eal_debug.c new file mode 100644 index 00000000..907fbfa7 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_debug.c @@ -0,0 +1,119 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#define BACKTRACE_SIZE 256 + +/* dump the stack of the calling core */ +void rte_dump_stack(void) +{ + void *func[BACKTRACE_SIZE]; + char **symb = NULL; + int size; + + size = backtrace(func, BACKTRACE_SIZE); + symb = backtrace_symbols(func, size); + + if (symb == NULL) + return; + + while (size > 0) { + rte_log(RTE_LOG_ERR, RTE_LOGTYPE_EAL, + "%d: [%s]\n", size, symb[size - 1]); + size --; + } + + free(symb); +} + +/* not implemented in this environment */ +void rte_dump_registers(void) +{ + return; +} + +/* call abort(), it will generate a coredump if enabled */ +void __rte_panic(const char *funcname, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + rte_log(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, "PANIC in %s():\n", funcname); + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + rte_dump_stack(); + rte_dump_registers(); + abort(); +} + +/* + * Like rte_panic this terminates the application. However, no traceback is + * provided and no core-dump is generated. + */ +void +rte_exit(int exit_code, const char *format, ...) +{ + va_list ap; + + /* disable history */ + rte_log_set_history(0); + + if (exit_code != 0) + RTE_LOG(CRIT, EAL, "Error - exiting with code: %d\n" + " Cause: ", exit_code); + + va_start(ap, format); + rte_vlog(RTE_LOG_CRIT, RTE_LOGTYPE_EAL, format, ap); + va_end(ap); + +#ifndef RTE_EAL_ALWAYS_PANIC_ON_ERROR + exit(exit_code); +#else + rte_dump_stack(); + rte_dump_registers(); + abort(); +#endif +} diff --git a/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c new file mode 100644 index 00000000..18858e2d --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_hugepage_info.c @@ -0,0 +1,365 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "rte_string_fns.h" +#include "eal_internal_cfg.h" +#include "eal_hugepages.h" +#include "eal_filesystem.h" + +static const char sys_dir_path[] = "/sys/kernel/mm/hugepages"; + +/* this function is only called from eal_hugepage_info_init which itself + * is only called from a primary process */ +static uint32_t +get_num_hugepages(const char *subdir) +{ + char path[PATH_MAX]; + long unsigned resv_pages, num_pages = 0; + const char *nr_hp_file = "free_hugepages"; + const char *nr_rsvd_file = "resv_hugepages"; + + /* first, check how many reserved pages kernel reports */ + snprintf(path, sizeof(path), "%s/%s/%s", + sys_dir_path, subdir, nr_rsvd_file); + if (eal_parse_sysfs_value(path, &resv_pages) < 0) + return 0; + + snprintf(path, sizeof(path), "%s/%s/%s", + sys_dir_path, subdir, nr_hp_file); + if (eal_parse_sysfs_value(path, &num_pages) < 0) + return 0; + + if (num_pages == 0) + RTE_LOG(WARNING, EAL, "No free hugepages reported in %s\n", + subdir); + + /* adjust num_pages */ + if (num_pages >= resv_pages) + num_pages -= resv_pages; + else if (resv_pages) + num_pages = 0; + + /* we want to return a uint32_t and more than this looks suspicious + * anyway ... */ + if (num_pages > UINT32_MAX) + num_pages = UINT32_MAX; + + return num_pages; +} + +static uint64_t +get_default_hp_size(void) +{ + const char proc_meminfo[] = "/proc/meminfo"; + const char str_hugepagesz[] = "Hugepagesize:"; + unsigned hugepagesz_len = sizeof(str_hugepagesz) - 1; + char buffer[256]; + unsigned long long size = 0; + + FILE *fd = fopen(proc_meminfo, "r"); + if (fd == NULL) + rte_panic("Cannot open %s\n", proc_meminfo); + while(fgets(buffer, sizeof(buffer), fd)){ + if (strncmp(buffer, str_hugepagesz, hugepagesz_len) == 0){ + size = rte_str_to_size(&buffer[hugepagesz_len]); + break; + } + } + fclose(fd); + if (size == 0) + rte_panic("Cannot get default hugepage size from %s\n", proc_meminfo); + return size; +} + +static const char * +get_hugepage_dir(uint64_t hugepage_sz) +{ + enum proc_mount_fieldnames { + DEVICE = 0, + MOUNTPT, + FSTYPE, + OPTIONS, + _FIELDNAME_MAX + }; + static uint64_t default_size = 0; + const char proc_mounts[] = "/proc/mounts"; + const char hugetlbfs_str[] = "hugetlbfs"; + const size_t htlbfs_str_len = sizeof(hugetlbfs_str) - 1; + const char pagesize_opt[] = "pagesize="; + const size_t pagesize_opt_len = sizeof(pagesize_opt) - 1; + const char split_tok = ' '; + char *splitstr[_FIELDNAME_MAX]; + char buf[BUFSIZ]; + char *retval = NULL; + + FILE *fd = fopen(proc_mounts, "r"); + if (fd == NULL) + rte_panic("Cannot open %s\n", proc_mounts); + + if (default_size == 0) + default_size = get_default_hp_size(); + + while (fgets(buf, sizeof(buf), fd)){ + if (rte_strsplit(buf, sizeof(buf), splitstr, _FIELDNAME_MAX, + split_tok) != _FIELDNAME_MAX) { + RTE_LOG(ERR, EAL, "Error parsing %s\n", proc_mounts); + break; /* return NULL */ + } + + /* we have a specified --huge-dir option, only examine that dir */ + if (internal_config.hugepage_dir != NULL && + strcmp(splitstr[MOUNTPT], internal_config.hugepage_dir) != 0) + continue; + + if (strncmp(splitstr[FSTYPE], hugetlbfs_str, htlbfs_str_len) == 0){ + const char *pagesz_str = strstr(splitstr[OPTIONS], pagesize_opt); + + /* if no explicit page size, the default page size is compared */ + if (pagesz_str == NULL){ + if (hugepage_sz == default_size){ + retval = strdup(splitstr[MOUNTPT]); + break; + } + } + /* there is an explicit page size, so check it */ + else { + uint64_t pagesz = rte_str_to_size(&pagesz_str[pagesize_opt_len]); + if (pagesz == hugepage_sz) { + retval = strdup(splitstr[MOUNTPT]); + break; + } + } + } /* end if strncmp hugetlbfs */ + } /* end while fgets */ + + fclose(fd); + return retval; +} + +/* + * Clear the hugepage directory of whatever hugepage files + * there are. Checks if the file is locked (i.e. + * if it's in use by another DPDK process). + */ +static int +clear_hugedir(const char * hugedir) +{ + DIR *dir; + struct dirent *dirent; + int dir_fd, fd, lck_result; + const char filter[] = "*map_*"; /* matches hugepage files */ + + /* open directory */ + dir = opendir(hugedir); + if (!dir) { + RTE_LOG(ERR, EAL, "Unable to open hugepage directory %s\n", + hugedir); + goto error; + } + dir_fd = dirfd(dir); + + dirent = readdir(dir); + if (!dirent) { + RTE_LOG(ERR, EAL, "Unable to read hugepage directory %s\n", + hugedir); + goto error; + } + + while(dirent != NULL){ + /* skip files that don't match the hugepage pattern */ + if (fnmatch(filter, dirent->d_name, 0) > 0) { + dirent = readdir(dir); + continue; + } + + /* try and lock the file */ + fd = openat(dir_fd, dirent->d_name, O_RDONLY); + + /* skip to next file */ + if (fd == -1) { + dirent = readdir(dir); + continue; + } + + /* non-blocking lock */ + lck_result = flock(fd, LOCK_EX | LOCK_NB); + + /* if lock succeeds, unlock and remove the file */ + if (lck_result != -1) { + flock(fd, LOCK_UN); + unlinkat(dir_fd, dirent->d_name, 0); + } + close (fd); + dirent = readdir(dir); + } + + closedir(dir); + return 0; + +error: + if (dir) + closedir(dir); + + RTE_LOG(ERR, EAL, "Error while clearing hugepage dir: %s\n", + strerror(errno)); + + return -1; +} + +static int +compare_hpi(const void *a, const void *b) +{ + const struct hugepage_info *hpi_a = a; + const struct hugepage_info *hpi_b = b; + + return hpi_b->hugepage_sz - hpi_a->hugepage_sz; +} + +/* + * when we initialize the hugepage info, everything goes + * to socket 0 by default. it will later get sorted by memory + * initialization procedure. + */ +int +eal_hugepage_info_init(void) +{ + const char dirent_start_text[] = "hugepages-"; + const size_t dirent_start_len = sizeof(dirent_start_text) - 1; + unsigned i, num_sizes = 0; + DIR *dir; + struct dirent *dirent; + + dir = opendir(sys_dir_path); + if (dir == NULL) + rte_panic("Cannot open directory %s to read system hugepage " + "info\n", sys_dir_path); + + for (dirent = readdir(dir); dirent != NULL; dirent = readdir(dir)) { + struct hugepage_info *hpi; + + if (strncmp(dirent->d_name, dirent_start_text, + dirent_start_len) != 0) + continue; + + if (num_sizes >= MAX_HUGEPAGE_SIZES) + break; + + hpi = &internal_config.hugepage_info[num_sizes]; + hpi->hugepage_sz = + rte_str_to_size(&dirent->d_name[dirent_start_len]); + hpi->hugedir = get_hugepage_dir(hpi->hugepage_sz); + + /* first, check if we have a mountpoint */ + if (hpi->hugedir == NULL) { + uint32_t num_pages; + + num_pages = get_num_hugepages(dirent->d_name); + if (num_pages > 0) + RTE_LOG(NOTICE, EAL, + "%" PRIu32 " hugepages of size " + "%" PRIu64 " reserved, but no mounted " + "hugetlbfs found for that size\n", + num_pages, hpi->hugepage_sz); + continue; + } + + /* try to obtain a writelock */ + hpi->lock_descriptor = open(hpi->hugedir, O_RDONLY); + + /* if blocking lock failed */ + if (flock(hpi->lock_descriptor, LOCK_EX) == -1) { + RTE_LOG(CRIT, EAL, + "Failed to lock hugepage directory!\n"); + break; + } + /* clear out the hugepages dir from unused pages */ + if (clear_hugedir(hpi->hugedir) == -1) + break; + + /* for now, put all pages into socket 0, + * later they will be sorted */ + hpi->num_pages[0] = get_num_hugepages(dirent->d_name); + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, limit number of hugepages to + * 1GB per page size */ + hpi->num_pages[0] = RTE_MIN(hpi->num_pages[0], + RTE_PGSIZE_1G / hpi->hugepage_sz); +#endif + + num_sizes++; + } + closedir(dir); + + /* something went wrong, and we broke from the for loop above */ + if (dirent != NULL) + return -1; + + internal_config.num_hugepage_sizes = num_sizes; + + /* sort the page directory entries by size, largest to smallest */ + qsort(&internal_config.hugepage_info[0], num_sizes, + sizeof(internal_config.hugepage_info[0]), compare_hpi); + + /* now we have all info, check we have at least one valid size */ + for (i = 0; i < num_sizes; i++) + if (internal_config.hugepage_info[i].hugedir != NULL && + internal_config.hugepage_info[i].num_pages[0] > 0) + return 0; + + /* no valid hugepage mounts available, return error */ + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_interrupts.c b/lib/librte_eal/linuxapp/eal/eal_interrupts.c new file mode 100644 index 00000000..06b26a9e --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_interrupts.c @@ -0,0 +1,1224 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_vfio.h" +#include "eal_thread.h" + +#define EAL_INTR_EPOLL_WAIT_FOREVER (-1) +#define NB_OTHER_INTR 1 + +static RTE_DEFINE_PER_LCORE(int, _epfd) = -1; /**< epoll fd per thread */ + +/** + * union for pipe fds. + */ +union intr_pipefds{ + struct { + int pipefd[2]; + }; + struct { + int readfd; + int writefd; + }; +}; + +/** + * union buffer for reading on different devices + */ +union rte_intr_read_buffer { + int uio_intr_count; /* for uio device */ +#ifdef VFIO_PRESENT + uint64_t vfio_intr_count; /* for vfio device */ +#endif + uint64_t timerfd_num; /* for timerfd */ + char charbuf[16]; /* for others */ +}; + +TAILQ_HEAD(rte_intr_cb_list, rte_intr_callback); +TAILQ_HEAD(rte_intr_source_list, rte_intr_source); + +struct rte_intr_callback { + TAILQ_ENTRY(rte_intr_callback) next; + rte_intr_callback_fn cb_fn; /**< callback address */ + void *cb_arg; /**< parameter for callback */ +}; + +struct rte_intr_source { + TAILQ_ENTRY(rte_intr_source) next; + struct rte_intr_handle intr_handle; /**< interrupt handle */ + struct rte_intr_cb_list callbacks; /**< user callbacks */ + uint32_t active; +}; + +/* global spinlock for interrupt data operation */ +static rte_spinlock_t intr_lock = RTE_SPINLOCK_INITIALIZER; + +/* union buffer for pipe read/write */ +static union intr_pipefds intr_pipe; + +/* interrupt sources list */ +static struct rte_intr_source_list intr_sources; + +/* interrupt handling thread */ +static pthread_t intr_thread; + +/* VFIO interrupts */ +#ifdef VFIO_PRESENT + +#define IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + sizeof(int)) +/* irq set buffer length for queue interrupts and LSC interrupt */ +#define MSIX_IRQ_SET_BUF_LEN (sizeof(struct vfio_irq_set) + \ + sizeof(int) * (RTE_MAX_RXTX_INTR_VEC_ID + 1)) + +/* enable legacy (INTx) interrupts */ +static int +vfio_enable_intx(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + /* enable INTx */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* unmask INTx after enabling */ + memset(irq_set, 0, len); + len = sizeof(struct vfio_irq_set); + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable legacy (INTx) interrupts */ +static int +vfio_disable_intx(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + /* mask interrupts before disabling */ + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_UNMASK; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error unmasking INTx interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + /* disable INTx*/ + memset(irq_set, 0, len); + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_INTX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, + "Error disabling INTx interrupts for fd %d\n", intr_handle->fd); + return -1; + } + return 0; +} + +/* enable MSI interrupts */ +static int +vfio_enable_msi(struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 1; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + *fd_ptr = intr_handle->fd; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + return 0; +} + +/* disable MSI interrupts */ +static int +vfio_disable_msi(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSI_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI interrupts for fd %d\n", intr_handle->fd); + + return ret; +} + +/* enable MSI-X interrupts */ +static int +vfio_enable_msix(struct rte_intr_handle *intr_handle) { + int len, ret; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + struct vfio_irq_set *irq_set; + int *fd_ptr; + + len = sizeof(irq_set_buf); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + if (!intr_handle->max_intr) + intr_handle->max_intr = 1; + else if (intr_handle->max_intr > RTE_MAX_RXTX_INTR_VEC_ID) + intr_handle->max_intr = RTE_MAX_RXTX_INTR_VEC_ID + 1; + + irq_set->count = intr_handle->max_intr; + irq_set->flags = VFIO_IRQ_SET_DATA_EVENTFD | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + fd_ptr = (int *) &irq_set->data; + /* INTR vector offset 0 reserve for non-efds mapping */ + fd_ptr[RTE_INTR_VEC_ZERO_OFFSET] = intr_handle->fd; + memcpy(&fd_ptr[RTE_INTR_VEC_RXTX_OFFSET], intr_handle->efds, + sizeof(*intr_handle->efds) * intr_handle->nb_efd); + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) { + RTE_LOG(ERR, EAL, "Error enabling MSI-X interrupts for fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +/* disable MSI-X interrupts */ +static int +vfio_disable_msix(struct rte_intr_handle *intr_handle) { + struct vfio_irq_set *irq_set; + char irq_set_buf[MSIX_IRQ_SET_BUF_LEN]; + int len, ret; + + len = sizeof(struct vfio_irq_set); + + irq_set = (struct vfio_irq_set *) irq_set_buf; + irq_set->argsz = len; + irq_set->count = 0; + irq_set->flags = VFIO_IRQ_SET_DATA_NONE | VFIO_IRQ_SET_ACTION_TRIGGER; + irq_set->index = VFIO_PCI_MSIX_IRQ_INDEX; + irq_set->start = 0; + + ret = ioctl(intr_handle->vfio_dev_fd, VFIO_DEVICE_SET_IRQS, irq_set); + + if (ret) + RTE_LOG(ERR, EAL, + "Error disabling MSI-X interrupts for fd %d\n", intr_handle->fd); + + return ret; +} +#endif + +static int +uio_intx_intr_disable(struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* disable interrupts */ + command_high |= 0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intx_intr_enable(struct rte_intr_handle *intr_handle) +{ + unsigned char command_high; + + /* use UIO config file descriptor for uio_pci_generic */ + if (pread(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error reading interrupts status for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + /* enable interrupts */ + command_high &= ~0x4; + if (pwrite(intr_handle->uio_cfg_fd, &command_high, 1, 5) != 1) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d\n", + intr_handle->uio_cfg_fd); + return -1; + } + + return 0; +} + +static int +uio_intr_disable(struct rte_intr_handle *intr_handle) +{ + const int value = 0; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error disabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +static int +uio_intr_enable(struct rte_intr_handle *intr_handle) +{ + const int value = 1; + + if (write(intr_handle->fd, &value, sizeof(value)) < 0) { + RTE_LOG(ERR, EAL, + "Error enabling interrupts for fd %d (%s)\n", + intr_handle->fd, strerror(errno)); + return -1; + } + return 0; +} + +int +rte_intr_callback_register(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb, void *cb_arg) +{ + int ret, wake_thread; + struct rte_intr_source *src; + struct rte_intr_callback *callback; + + wake_thread = 0; + + /* first do parameter checking */ + if (intr_handle == NULL || intr_handle->fd < 0 || cb == NULL) { + RTE_LOG(ERR, EAL, + "Registering with invalid input parameter\n"); + return -EINVAL; + } + + /* allocate a new interrupt callback entity */ + callback = rte_zmalloc("interrupt callback list", + sizeof(*callback), 0); + if (callback == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + return -ENOMEM; + } + callback->cb_fn = cb; + callback->cb_arg = cb_arg; + + rte_spinlock_lock(&intr_lock); + + /* check if there is at least one callback registered for the fd */ + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->intr_handle.fd == intr_handle->fd) { + /* we had no interrupts for this */ + if TAILQ_EMPTY(&src->callbacks) + wake_thread = 1; + + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + ret = 0; + break; + } + } + + /* no existing callbacks for this - add new source */ + if (src == NULL) { + if ((src = rte_zmalloc("interrupt source list", + sizeof(*src), 0)) == NULL) { + RTE_LOG(ERR, EAL, "Can not allocate memory\n"); + rte_free(callback); + ret = -ENOMEM; + } else { + src->intr_handle = *intr_handle; + TAILQ_INIT(&src->callbacks); + TAILQ_INSERT_TAIL(&(src->callbacks), callback, next); + TAILQ_INSERT_TAIL(&intr_sources, src, next); + wake_thread = 1; + ret = 0; + } + } + + rte_spinlock_unlock(&intr_lock); + + /** + * check if need to notify the pipe fd waited by epoll_wait to + * rebuild the wait list. + */ + if (wake_thread) + if (write(intr_pipe.writefd, "1", 1) < 0) + return -EPIPE; + + return ret; +} + +int +rte_intr_callback_unregister(struct rte_intr_handle *intr_handle, + rte_intr_callback_fn cb_fn, void *cb_arg) +{ + int ret; + struct rte_intr_source *src; + struct rte_intr_callback *cb, *next; + + /* do parameter checking first */ + if (intr_handle == NULL || intr_handle->fd < 0) { + RTE_LOG(ERR, EAL, + "Unregistering with invalid input parameter\n"); + return -EINVAL; + } + + rte_spinlock_lock(&intr_lock); + + /* check if the insterrupt source for the fd is existent */ + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == intr_handle->fd) + break; + + /* No interrupt source registered for the fd */ + if (src == NULL) { + ret = -ENOENT; + + /* interrupt source has some active callbacks right now. */ + } else if (src->active != 0) { + ret = -EAGAIN; + + /* ok to remove. */ + } else { + ret = 0; + + /*walk through the callbacks and remove all that match. */ + for (cb = TAILQ_FIRST(&src->callbacks); cb != NULL; cb = next) { + + next = TAILQ_NEXT(cb, next); + + if (cb->cb_fn == cb_fn && (cb_arg == (void *)-1 || + cb->cb_arg == cb_arg)) { + TAILQ_REMOVE(&src->callbacks, cb, next); + rte_free(cb); + ret++; + } + } + + /* all callbacks for that source are removed. */ + if (TAILQ_EMPTY(&src->callbacks)) { + TAILQ_REMOVE(&intr_sources, src, next); + rte_free(src); + } + } + + rte_spinlock_unlock(&intr_lock); + + /* notify the pipe fd waited by epoll_wait to rebuild the wait list */ + if (ret >= 0 && write(intr_pipe.writefd, "1", 1) < 0) { + ret = -EPIPE; + } + + return ret; +} + +int +rte_intr_enable(struct rte_intr_handle *intr_handle) +{ + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type){ + /* write to the uio fd to enable the interrupt */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_enable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_enable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_enable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_enable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_enable_intx(intr_handle)) + return -1; + break; +#endif + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +int +rte_intr_disable(struct rte_intr_handle *intr_handle) +{ + if (!intr_handle || intr_handle->fd < 0 || intr_handle->uio_cfg_fd < 0) + return -1; + + switch (intr_handle->type){ + /* write to the uio fd to disable the interrupt */ + case RTE_INTR_HANDLE_UIO: + if (uio_intr_disable(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_UIO_INTX: + if (uio_intx_intr_disable(intr_handle)) + return -1; + break; + /* not used at this moment */ + case RTE_INTR_HANDLE_ALARM: + return -1; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + if (vfio_disable_msix(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_MSI: + if (vfio_disable_msi(intr_handle)) + return -1; + break; + case RTE_INTR_HANDLE_VFIO_LEGACY: + if (vfio_disable_intx(intr_handle)) + return -1; + break; +#endif + /* unknown handle type */ + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } + + return 0; +} + +static int +eal_intr_process_interrupts(struct epoll_event *events, int nfds) +{ + int n, bytes_read; + struct rte_intr_source *src; + struct rte_intr_callback *cb; + union rte_intr_read_buffer buf; + struct rte_intr_callback active_cb; + + for (n = 0; n < nfds; n++) { + + /** + * if the pipe fd is ready to read, return out to + * rebuild the wait list. + */ + if (events[n].data.fd == intr_pipe.readfd){ + int r = read(intr_pipe.readfd, buf.charbuf, + sizeof(buf.charbuf)); + RTE_SET_USED(r); + return -1; + } + rte_spinlock_lock(&intr_lock); + TAILQ_FOREACH(src, &intr_sources, next) + if (src->intr_handle.fd == + events[n].data.fd) + break; + if (src == NULL){ + rte_spinlock_unlock(&intr_lock); + continue; + } + + /* mark this interrupt source as active and release the lock. */ + src->active = 1; + rte_spinlock_unlock(&intr_lock); + + /* set the length to be read dor different handle type */ + switch (src->intr_handle.type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; + case RTE_INTR_HANDLE_ALARM: + bytes_read = sizeof(buf.timerfd_num); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + case RTE_INTR_HANDLE_EXT: + default: + bytes_read = 1; + break; + } + + if (src->intr_handle.type != RTE_INTR_HANDLE_EXT) { + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + bytes_read = read(events[n].data.fd, &buf, bytes_read); + if (bytes_read < 0) { + if (errno == EINTR || errno == EWOULDBLOCK) + continue; + + RTE_LOG(ERR, EAL, "Error reading from file " + "descriptor %d: %s\n", + events[n].data.fd, + strerror(errno)); + } else if (bytes_read == 0) + RTE_LOG(ERR, EAL, "Read nothing from file " + "descriptor %d\n", events[n].data.fd); + } + + /* grab a lock, again to call callbacks and update status. */ + rte_spinlock_lock(&intr_lock); + + if (bytes_read > 0) { + + /* Finally, call all callbacks. */ + TAILQ_FOREACH(cb, &src->callbacks, next) { + + /* make a copy and unlock. */ + active_cb = *cb; + rte_spinlock_unlock(&intr_lock); + + /* call the actual callback */ + active_cb.cb_fn(&src->intr_handle, + active_cb.cb_arg); + + /*get the lock back. */ + rte_spinlock_lock(&intr_lock); + } + } + + /* we done with that interrupt source, release it. */ + src->active = 0; + rte_spinlock_unlock(&intr_lock); + } + + return 0; +} + +/** + * It handles all the interrupts. + * + * @param pfd + * epoll file descriptor. + * @param totalfds + * The number of file descriptors added in epoll. + * + * @return + * void + */ +static void +eal_intr_handle_interrupts(int pfd, unsigned totalfds) +{ + struct epoll_event events[totalfds]; + int nfds = 0; + + for(;;) { + nfds = epoll_wait(pfd, events, totalfds, + EAL_INTR_EPOLL_WAIT_FOREVER); + /* epoll_wait fail */ + if (nfds < 0) { + if (errno == EINTR) + continue; + RTE_LOG(ERR, EAL, + "epoll_wait returns with fail\n"); + return; + } + /* epoll_wait timeout, will never happens here */ + else if (nfds == 0) + continue; + /* epoll_wait has at least one fd ready to read */ + if (eal_intr_process_interrupts(events, nfds) < 0) + return; + } +} + +/** + * It builds/rebuilds up the epoll file descriptor with all the + * file descriptors being waited on. Then handles the interrupts. + * + * @param arg + * pointer. (unused) + * + * @return + * never return; + */ +static __attribute__((noreturn)) void * +eal_intr_thread_main(__rte_unused void *arg) +{ + struct epoll_event ev; + + /* host thread, never break out */ + for (;;) { + /* build up the epoll fd with all descriptors we are to + * wait on then pass it to the handle_interrupts function + */ + static struct epoll_event pipe_event = { + .events = EPOLLIN | EPOLLPRI, + }; + struct rte_intr_source *src; + unsigned numfds = 0; + + /* create epoll fd */ + int pfd = epoll_create(1); + if (pfd < 0) + rte_panic("Cannot create epoll instance\n"); + + pipe_event.data.fd = intr_pipe.readfd; + /** + * add pipe fd into wait list, this pipe is used to + * rebuild the wait list. + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, intr_pipe.readfd, + &pipe_event) < 0) { + rte_panic("Error adding fd to %d epoll_ctl, %s\n", + intr_pipe.readfd, strerror(errno)); + } + numfds++; + + rte_spinlock_lock(&intr_lock); + + TAILQ_FOREACH(src, &intr_sources, next) { + if (src->callbacks.tqh_first == NULL) + continue; /* skip those with no callbacks */ + ev.events = EPOLLIN | EPOLLPRI; + ev.data.fd = src->intr_handle.fd; + + /** + * add all the uio device file descriptor + * into wait list. + */ + if (epoll_ctl(pfd, EPOLL_CTL_ADD, + src->intr_handle.fd, &ev) < 0){ + rte_panic("Error adding fd %d epoll_ctl, %s\n", + src->intr_handle.fd, strerror(errno)); + } + else + numfds++; + } + rte_spinlock_unlock(&intr_lock); + /* serve the interrupt */ + eal_intr_handle_interrupts(pfd, numfds); + + /** + * when we return, we need to rebuild the + * list of fds to monitor. + */ + close(pfd); + } +} + +int +rte_eal_intr_init(void) +{ + int ret = 0, ret_1 = 0; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + /* init the global interrupt source head */ + TAILQ_INIT(&intr_sources); + + /** + * create a pipe which will be waited by epoll and notified to + * rebuild the wait list of epoll. + */ + if (pipe(intr_pipe.pipefd) < 0) + return -1; + + /* create the host thread to wait/handle the interrupt */ + ret = pthread_create(&intr_thread, NULL, + eal_intr_thread_main, NULL); + if (ret != 0) { + RTE_LOG(ERR, EAL, + "Failed to create thread for interrupt handling\n"); + } else { + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, + "eal-intr-thread"); + ret_1 = rte_thread_setname(intr_thread, thread_name); + if (ret_1 != 0) + RTE_LOG(ERR, EAL, + "Failed to set thread name for interrupt handling\n"); + } + + return -ret; +} + +static void +eal_intr_proc_rxtx_intr(int fd, const struct rte_intr_handle *intr_handle) +{ + union rte_intr_read_buffer buf; + int bytes_read = 1; + int nbytes; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + bytes_read = sizeof(buf.uio_intr_count); + break; +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + bytes_read = sizeof(buf.vfio_intr_count); + break; +#endif + default: + bytes_read = 1; + RTE_LOG(INFO, EAL, "unexpected intr type\n"); + break; + } + + /** + * read out to clear the ready-to-be-read flag + * for epoll_wait. + */ + do { + nbytes = read(fd, &buf, bytes_read); + if (nbytes < 0) { + if (errno == EINTR || errno == EWOULDBLOCK || + errno == EAGAIN) + continue; + RTE_LOG(ERR, EAL, + "Error reading from fd %d: %s\n", + fd, strerror(errno)); + } else if (nbytes == 0) + RTE_LOG(ERR, EAL, "Read nothing from fd %d\n", fd); + return; + } while (1); +} + +static int +eal_epoll_process_event(struct epoll_event *evs, unsigned int n, + struct rte_epoll_event *events) +{ + unsigned int i, count = 0; + struct rte_epoll_event *rev; + + for (i = 0; i < n; i++) { + rev = evs[i].data.ptr; + if (!rev || !rte_atomic32_cmpset(&rev->status, RTE_EPOLL_VALID, + RTE_EPOLL_EXEC)) + continue; + + events[count].status = RTE_EPOLL_VALID; + events[count].fd = rev->fd; + events[count].epfd = rev->epfd; + events[count].epdata.event = rev->epdata.event; + events[count].epdata.data = rev->epdata.data; + if (rev->epdata.cb_fun) + rev->epdata.cb_fun(rev->fd, + rev->epdata.cb_arg); + + rte_compiler_barrier(); + rev->status = RTE_EPOLL_VALID; + count++; + } + return count; +} + +static inline int +eal_init_tls_epfd(void) +{ + int pfd = epoll_create(255); + + if (pfd < 0) { + RTE_LOG(ERR, EAL, + "Cannot create epoll instance\n"); + return -1; + } + return pfd; +} + +int +rte_intr_tls_epfd(void) +{ + if (RTE_PER_LCORE(_epfd) == -1) + RTE_PER_LCORE(_epfd) = eal_init_tls_epfd(); + + return RTE_PER_LCORE(_epfd); +} + +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout) +{ + struct epoll_event evs[maxevents]; + int rc; + + if (!events) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + while (1) { + rc = epoll_wait(epfd, evs, maxevents, timeout); + if (likely(rc > 0)) { + /* epoll_wait has at least one fd ready to read */ + rc = eal_epoll_process_event(evs, rc, events); + break; + } else if (rc < 0) { + if (errno == EINTR) + continue; + /* epoll_wait fail */ + RTE_LOG(ERR, EAL, "epoll_wait returns with fail %s\n", + strerror(errno)); + rc = -1; + break; + } else { + /* rc == 0, epoll_wait timed out */ + break; + } + } + + return rc; +} + +static inline void +eal_epoll_data_safe_free(struct rte_epoll_event *ev) +{ + while (!rte_atomic32_cmpset(&ev->status, RTE_EPOLL_VALID, + RTE_EPOLL_INVALID)) + while (ev->status != RTE_EPOLL_VALID) + rte_pause(); + memset(&ev->epdata, 0, sizeof(ev->epdata)); + ev->fd = -1; + ev->epfd = -1; +} + +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event) +{ + struct epoll_event ev; + + if (!event) { + RTE_LOG(ERR, EAL, "rte_epoll_event can't be NULL\n"); + return -1; + } + + /* using per thread epoll fd */ + if (epfd == RTE_EPOLL_PER_THREAD) + epfd = rte_intr_tls_epfd(); + + if (op == EPOLL_CTL_ADD) { + event->status = RTE_EPOLL_VALID; + event->fd = fd; /* ignore fd in event */ + event->epfd = epfd; + ev.data.ptr = (void *)event; + } + + ev.events = event->epdata.event; + if (epoll_ctl(epfd, op, fd, &ev) < 0) { + RTE_LOG(ERR, EAL, "Error op %d fd %d epoll_ctl, %s\n", + op, fd, strerror(errno)); + if (op == EPOLL_CTL_ADD) + /* rollback status when CTL_ADD fail */ + event->status = RTE_EPOLL_INVALID; + return -1; + } + + if (op == EPOLL_CTL_DEL && event->status != RTE_EPOLL_INVALID) + eal_epoll_data_safe_free(event); + + return 0; +} + +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, int epfd, + int op, unsigned int vec, void *data) +{ + struct rte_epoll_event *rev; + struct rte_epoll_data *epdata; + int epfd_op; + unsigned int efd_idx; + int rc = 0; + + efd_idx = (vec >= RTE_INTR_VEC_RXTX_OFFSET) ? + (vec - RTE_INTR_VEC_RXTX_OFFSET) : vec; + + if (!intr_handle || intr_handle->nb_efd == 0 || + efd_idx >= intr_handle->nb_efd) { + RTE_LOG(ERR, EAL, "Wrong intr vector number.\n"); + return -EPERM; + } + + switch (op) { + case RTE_INTR_EVENT_ADD: + epfd_op = EPOLL_CTL_ADD; + rev = &intr_handle->elist[efd_idx]; + if (rev->status != RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event already been added.\n"); + return -EEXIST; + } + + /* attach to intr vector fd */ + epdata = &rev->epdata; + epdata->event = EPOLLIN | EPOLLPRI | EPOLLET; + epdata->data = data; + epdata->cb_fun = (rte_intr_event_cb_t)eal_intr_proc_rxtx_intr; + epdata->cb_arg = (void *)intr_handle; + rc = rte_epoll_ctl(epfd, epfd_op, + intr_handle->efds[efd_idx], rev); + if (!rc) + RTE_LOG(DEBUG, EAL, + "efd %d associated with vec %d added on epfd %d" + "\n", rev->fd, vec, epfd); + else + rc = -EPERM; + break; + case RTE_INTR_EVENT_DEL: + epfd_op = EPOLL_CTL_DEL; + rev = &intr_handle->elist[efd_idx]; + if (rev->status == RTE_EPOLL_INVALID) { + RTE_LOG(INFO, EAL, "Event does not exist.\n"); + return -EPERM; + } + + rc = rte_epoll_ctl(rev->epfd, epfd_op, rev->fd, rev); + if (rc) + rc = -EPERM; + break; + default: + RTE_LOG(ERR, EAL, "event op type mismatch\n"); + rc = -EPERM; + } + + return rc; +} + +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd) +{ + uint32_t i; + int fd; + uint32_t n = RTE_MIN(nb_efd, (uint32_t)RTE_MAX_RXTX_INTR_VEC_ID); + + assert(nb_efd != 0); + + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) { + for (i = 0; i < n; i++) { + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, + "can't setup eventfd, error %i (%s)\n", + errno, strerror(errno)); + return -1; + } + intr_handle->efds[i] = fd; + } + intr_handle->nb_efd = n; + intr_handle->max_intr = NB_OTHER_INTR + n; + } else { + intr_handle->efds[0] = intr_handle->fd; + intr_handle->nb_efd = RTE_MIN(nb_efd, 1U); + intr_handle->max_intr = NB_OTHER_INTR; + } + + return 0; +} + +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle) +{ + uint32_t i; + struct rte_epoll_event *rev; + + for (i = 0; i < intr_handle->nb_efd; i++) { + rev = &intr_handle->elist[i]; + if (rev->status == RTE_EPOLL_INVALID) + continue; + if (rte_epoll_ctl(rev->epfd, EPOLL_CTL_DEL, rev->fd, rev)) { + /* force free if the entry valid */ + eal_epoll_data_safe_free(rev); + rev->status = RTE_EPOLL_INVALID; + } + } + + if (intr_handle->max_intr > intr_handle->nb_efd) { + for (i = 0; i < intr_handle->nb_efd; i++) + close(intr_handle->efds[i]); + } + intr_handle->nb_efd = 0; + intr_handle->max_intr = 0; +} + +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle) +{ + return !(!intr_handle->nb_efd); +} + +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle) +{ + if (!rte_intr_dp_is_en(intr_handle)) + return 1; + else + return !!(intr_handle->max_intr - intr_handle->nb_efd); +} + +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle) +{ + if (intr_handle->type == RTE_INTR_HANDLE_VFIO_MSIX) + return 1; + + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_ivshmem.c b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c new file mode 100644 index 00000000..07aec694 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_ivshmem.c @@ -0,0 +1,955 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifdef RTE_LIBRTE_IVSHMEM /* hide it from coverage */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_internal_cfg.h" +#include "eal_private.h" + +#define PCI_VENDOR_ID_IVSHMEM 0x1Af4 +#define PCI_DEVICE_ID_IVSHMEM 0x1110 + +#define IVSHMEM_MAGIC 0x0BADC0DE + +#define IVSHMEM_RESOURCE_PATH "/sys/bus/pci/devices/%04x:%02x:%02x.%x/resource2" +#define IVSHMEM_CONFIG_PATH "/var/run/.%s_ivshmem_config" + +#define PHYS 0x1 +#define VIRT 0x2 +#define IOREMAP 0x4 +#define FULL (PHYS|VIRT|IOREMAP) + +#define METADATA_SIZE_ALIGNED \ + (RTE_ALIGN_CEIL(sizeof(struct rte_ivshmem_metadata),pagesz)) + +#define CONTAINS(x,y)\ + (((y).addr_64 >= (x).addr_64) && ((y).addr_64 < (x).addr_64 + (x).len)) + +#define DIM(x) (sizeof(x)/sizeof(x[0])) + +struct ivshmem_pci_device { + char path[PATH_MAX]; + phys_addr_t ioremap_addr; +}; + +/* data type to store in config */ +struct ivshmem_segment { + struct rte_ivshmem_metadata_entry entry; + uint64_t align; + char path[PATH_MAX]; +}; +struct ivshmem_shared_config { + struct ivshmem_segment segment[RTE_MAX_MEMSEG]; + uint32_t segment_idx; + struct ivshmem_pci_device pci_devs[RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS]; + uint32_t pci_devs_idx; +}; +static struct ivshmem_shared_config * ivshmem_config; +static int memseg_idx; +static int pagesz; + +/* Tailq heads to add rings to */ +TAILQ_HEAD(rte_ring_list, rte_tailq_entry); + +/* + * Utility functions + */ + +static int +is_ivshmem_device(struct rte_pci_device * dev) +{ + return dev->id.vendor_id == PCI_VENDOR_ID_IVSHMEM + && dev->id.device_id == PCI_DEVICE_ID_IVSHMEM; +} + +static void * +map_metadata(int fd, uint64_t len) +{ + size_t metadata_len = sizeof(struct rte_ivshmem_metadata); + size_t aligned_len = METADATA_SIZE_ALIGNED; + + return mmap(NULL, metadata_len, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, len - aligned_len); +} + +static void +unmap_metadata(void * ptr) +{ + munmap(ptr, sizeof(struct rte_ivshmem_metadata)); +} + +static int +has_ivshmem_metadata(int fd, uint64_t len) +{ + struct rte_ivshmem_metadata metadata; + void * ptr; + + ptr = map_metadata(fd, len); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + return metadata.magic_number == IVSHMEM_MAGIC; +} + +static void +remove_segment(struct ivshmem_segment * ms, int len, int idx) +{ + int i; + + for (i = idx; i < len - 1; i++) + memcpy(&ms[i], &ms[i+1], sizeof(struct ivshmem_segment)); + memset(&ms[len-1], 0, sizeof(struct ivshmem_segment)); +} + +static int +overlap(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check for overlap in virtual addresses */ + if (start1 > start2 && start1 < end2) + result |= VIRT; + if (start2 >= start1 && start2 < end1) + result |= VIRT; + + /* check for overlap in physical addresses */ + if (p_start1 > p_start2 && p_start1 < p_end2) + result |= PHYS; + if (p_start2 > p_start1 && p_start2 < p_end1) + result |= PHYS; + + /* check for overlap in ioremap addresses */ + if (i_start1 > i_start2 && i_start1 < i_end2) + result |= IOREMAP; + if (i_start2 > i_start1 && i_start2 < i_end1) + result |= IOREMAP; + + return result; +} + +static int +adjacent(const struct rte_memzone * mz1, const struct rte_memzone * mz2) +{ + uint64_t start1, end1, start2, end2; + uint64_t p_start1, p_end1, p_start2, p_end2; + uint64_t i_start1, i_end1, i_start2, i_end2; + int result = 0; + + /* gather virtual addresses */ + start1 = mz1->addr_64; + end1 = mz1->addr_64 + mz1->len; + start2 = mz2->addr_64; + end2 = mz2->addr_64 + mz2->len; + + /* gather physical addresses */ + p_start1 = mz1->phys_addr; + p_end1 = mz1->phys_addr + mz1->len; + p_start2 = mz2->phys_addr; + p_end2 = mz2->phys_addr + mz2->len; + + /* gather ioremap addresses */ + i_start1 = mz1->ioremap_addr; + i_end1 = mz1->ioremap_addr + mz1->len; + i_start2 = mz2->ioremap_addr; + i_end2 = mz2->ioremap_addr + mz2->len; + + /* check if segments are virtually adjacent */ + if (start1 == end2) + result |= VIRT; + if (start2 == end1) + result |= VIRT; + + /* check if segments are physically adjacent */ + if (p_start1 == p_end2) + result |= PHYS; + if (p_start2 == p_end1) + result |= PHYS; + + /* check if segments are ioremap-adjacent */ + if (i_start1 == i_end2) + result |= IOREMAP; + if (i_start2 == i_end1) + result |= IOREMAP; + + return result; +} + +static int +has_adjacent_segments(struct ivshmem_segment * ms, int len) +{ + int i, j; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) { + /* we're only interested in fully adjacent segments; partially + * adjacent segments can coexist. + */ + if (adjacent(&ms[i].entry.mz, &ms[j].entry.mz) == FULL) + return 1; + } + return 0; +} + +static int +has_overlapping_segments(struct ivshmem_segment * ms, int len) +{ + int i, j; + + for (i = 0; i < len; i++) + for (j = i + 1; j < len; j++) + if (overlap(&ms[i].entry.mz, &ms[j].entry.mz)) + return 1; + return 0; +} + +static int +seg_compare(const void * a, const void * b) +{ + const struct ivshmem_segment * s1 = (const struct ivshmem_segment*) a; + const struct ivshmem_segment * s2 = (const struct ivshmem_segment*) b; + + /* move unallocated zones to the end */ + if (s1->entry.mz.addr == NULL && s2->entry.mz.addr == NULL) + return 0; + if (s1->entry.mz.addr == 0) + return 1; + if (s2->entry.mz.addr == 0) + return -1; + + return s1->entry.mz.phys_addr > s2->entry.mz.phys_addr; +} + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG +static void +entry_dump(struct rte_ivshmem_metadata_entry *e) +{ + RTE_LOG(DEBUG, EAL, "\tvirt: %p-%p\n", e->mz.addr, + RTE_PTR_ADD(e->mz.addr, e->mz.len)); + RTE_LOG(DEBUG, EAL, "\tphys: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.phys_addr, + e->mz.phys_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tio: 0x%" PRIx64 "-0x%" PRIx64 "\n", + e->mz.ioremap_addr, + e->mz.ioremap_addr + e->mz.len); + RTE_LOG(DEBUG, EAL, "\tlen: 0x%" PRIx64 "\n", e->mz.len); + RTE_LOG(DEBUG, EAL, "\toff: 0x%" PRIx64 "\n", e->offset); +} +#endif + + + +/* + * Actual useful code + */ + +/* read through metadata mapped from the IVSHMEM device */ +static int +read_metadata(char * path, int path_len, int fd, uint64_t flen) +{ + struct rte_ivshmem_metadata metadata; + struct rte_ivshmem_metadata_entry * entry; + int idx, i; + void * ptr; + + ptr = map_metadata(fd, flen); + + if (ptr == MAP_FAILED) + return -1; + + metadata = *(struct rte_ivshmem_metadata*) (ptr); + + unmap_metadata(ptr); + + RTE_LOG(DEBUG, EAL, "Parsing metadata for \"%s\"\n", metadata.name); + + idx = ivshmem_config->segment_idx; + + for (i = 0; i < RTE_LIBRTE_IVSHMEM_MAX_ENTRIES && + idx <= RTE_MAX_MEMSEG; i++) { + + if (idx == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Not enough memory segments!\n"); + return -1; + } + + entry = &metadata.entry[i]; + + /* stop on uninitialized memzone */ + if (entry->mz.len == 0) + break; + + /* copy metadata entry */ + memcpy(&ivshmem_config->segment[idx].entry, entry, + sizeof(struct rte_ivshmem_metadata_entry)); + + /* copy path */ + snprintf(ivshmem_config->segment[idx].path, path_len, "%s", path); + + idx++; + } + ivshmem_config->segment_idx = idx; + + return 0; +} + +/* check through each segment and look for adjacent or overlapping ones. */ +static int +cleanup_segments(struct ivshmem_segment * ms, int tbl_len) +{ + struct ivshmem_segment * s, * tmp; + int i, j, concat, seg_adjacent, seg_overlapping; + uint64_t start1, start2, end1, end2, p_start1, p_start2, i_start1, i_start2; + + qsort(ms, tbl_len, sizeof(struct ivshmem_segment), + seg_compare); + + while (has_overlapping_segments(ms, tbl_len) || + has_adjacent_segments(ms, tbl_len)) { + + for (i = 0; i < tbl_len; i++) { + s = &ms[i]; + + concat = 0; + + for (j = i + 1; j < tbl_len; j++) { + tmp = &ms[j]; + + /* check if this segment is overlapping with existing segment, + * or is adjacent to existing segment */ + seg_overlapping = overlap(&s->entry.mz, &tmp->entry.mz); + seg_adjacent = adjacent(&s->entry.mz, &tmp->entry.mz); + + /* check if segments fully overlap or are fully adjacent */ + if ((seg_adjacent == FULL) || (seg_overlapping == FULL)) { + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Concatenating segments\n"); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + + start1 = s->entry.mz.addr_64; + start2 = tmp->entry.mz.addr_64; + p_start1 = s->entry.mz.phys_addr; + p_start2 = tmp->entry.mz.phys_addr; + i_start1 = s->entry.mz.ioremap_addr; + i_start2 = tmp->entry.mz.ioremap_addr; + end1 = s->entry.mz.addr_64 + s->entry.mz.len; + end2 = tmp->entry.mz.addr_64 + tmp->entry.mz.len; + + /* settle for minimum start address and maximum length */ + s->entry.mz.addr_64 = RTE_MIN(start1, start2); + s->entry.mz.phys_addr = RTE_MIN(p_start1, p_start2); + s->entry.mz.ioremap_addr = RTE_MIN(i_start1, i_start2); + s->entry.offset = RTE_MIN(s->entry.offset, tmp->entry.offset); + s->entry.mz.len = RTE_MAX(end1, end2) - s->entry.mz.addr_64; + concat = 1; + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Resulting segment:\n"); + entry_dump(&s->entry); + +#endif + } + /* if segments not fully overlap, we have an error condition. + * adjacent segments can coexist. + */ + else if (seg_overlapping > 0) { + RTE_LOG(ERR, EAL, "Segments %i and %i overlap!\n", i, j); +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + RTE_LOG(DEBUG, EAL, "Segment %i:\n", i); + entry_dump(&s->entry); + RTE_LOG(DEBUG, EAL, "Segment %i:\n", j); + entry_dump(&tmp->entry); +#endif + return -1; + } + if (concat) + break; + } + /* if we concatenated, remove segment at j */ + if (concat) { + remove_segment(ms, tbl_len, j); + tbl_len--; + break; + } + } + } + + return tbl_len; +} + +static int +create_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_CREAT | O_RDWR, 0600); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the file is locked, we have a problem */ + if (flock(fd, LOCK_EX | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s\n", path, strerror(errno)); + close(fd); + return -1; + } + + if (ftruncate(fd, sizeof(struct ivshmem_shared_config)) < 0) { + RTE_LOG(ERR, EAL, "ftruncate failed: %s\n", strerror(errno)); + return -1; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + memset(ivshmem_config, 0, sizeof(struct ivshmem_shared_config)); + + /* change the exclusive lock we got earlier to a shared lock */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* open shared config file and, if present, map the config. + * having no config file is not an error condition, as we later check if + * ivshmem_config is NULL (if it is, that means nothing was mapped). */ +static int +open_shared_config(void) +{ + char path[PATH_MAX]; + int fd; + + /* build ivshmem config file path */ + snprintf(path, sizeof(path), IVSHMEM_CONFIG_PATH, + internal_config.hugefile_prefix); + + fd = open(path, O_RDONLY); + + /* if the file doesn't exist, just return success */ + if (fd < 0 && errno == ENOENT) + return 0; + /* else we have an error condition */ + else if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s: %s\n", + path, strerror(errno)); + return -1; + } + + /* try ex-locking first - if the lock *does* succeed, this means it's a + * stray config file, so it should be deleted. + */ + if (flock(fd, LOCK_EX | LOCK_NB) != -1) { + + /* if we can't remove the file, something is wrong */ + if (unlink(path) < 0) { + RTE_LOG(ERR, EAL, "Could not remove %s: %s\n", path, + strerror(errno)); + return -1; + } + + /* release the lock */ + flock(fd, LOCK_UN); + close(fd); + + /* return success as having a stray config file is equivalent to not + * having config file at all. + */ + return 0; + } + + ivshmem_config = mmap(NULL, sizeof(struct ivshmem_shared_config), + PROT_READ, MAP_SHARED, fd, 0); + + if (ivshmem_config == MAP_FAILED) + return -1; + + /* place a shared lock on config file */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "Locking %s failed: %s \n", path, strerror(errno)); + return -1; + } + + close(fd); + + return 0; +} + +/* + * This function does the following: + * + * 1) Builds a table of ivshmem_segments with proper offset alignment + * 2) Cleans up that table so that we don't have any overlapping or adjacent + * memory segments + * 3) Creates memsegs from this table and maps them into memory. + */ +static inline int +map_all_segments(void) +{ + struct ivshmem_segment ms_tbl[RTE_MAX_MEMSEG]; + struct ivshmem_pci_device * pci_dev; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + int fd, fd_zero; + unsigned i, j; + struct rte_memzone mz; + struct rte_memseg ms; + void * base_addr; + uint64_t align, len; + phys_addr_t ioremap_addr; + + ioremap_addr = 0; + + memset(ms_tbl, 0, sizeof(ms_tbl)); + memset(&mz, 0, sizeof(struct rte_memzone)); + memset(&ms, 0, sizeof(struct rte_memseg)); + + /* first, build a table of memsegs to map, to avoid failed mmaps due to + * overlaps + */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMSEG; i++) { + if (i == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Too many segments requested!\n"); + return -1; + } + + seg = &ivshmem_config->segment[i]; + + /* copy segment to table */ + memcpy(&ms_tbl[i], seg, sizeof(struct ivshmem_segment)); + + /* find ioremap addr */ + for (j = 0; j < DIM(ivshmem_config->pci_devs); j++) { + pci_dev = &ivshmem_config->pci_devs[j]; + if (!strncmp(pci_dev->path, seg->path, sizeof(pci_dev->path))) { + ioremap_addr = pci_dev->ioremap_addr; + break; + } + } + if (ioremap_addr == 0) { + RTE_LOG(ERR, EAL, "Cannot find ioremap addr!\n"); + return -1; + } + + /* work out alignments */ + align = seg->entry.mz.addr_64 - + RTE_ALIGN_FLOOR(seg->entry.mz.addr_64, 0x1000); + len = RTE_ALIGN_CEIL(seg->entry.mz.len + align, 0x1000); + + /* save original alignments */ + ms_tbl[i].align = align; + + /* create a memory zone */ + mz.addr_64 = seg->entry.mz.addr_64 - align; + mz.len = len; + mz.hugepage_sz = seg->entry.mz.hugepage_sz; + mz.phys_addr = seg->entry.mz.phys_addr - align; + + /* find true physical address */ + mz.ioremap_addr = ioremap_addr + seg->entry.offset - align; + + ms_tbl[i].entry.offset = seg->entry.offset - align; + + memcpy(&ms_tbl[i].entry.mz, &mz, sizeof(struct rte_memzone)); + } + + /* clean up the segments */ + memseg_idx = cleanup_segments(ms_tbl, ivshmem_config->segment_idx); + + if (memseg_idx < 0) + return -1; + + mcfg = rte_eal_get_configuration()->mem_config; + + fd_zero = open("/dev/zero", O_RDWR); + + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Cannot open /dev/zero: %s\n", strerror(errno)); + return -1; + } + + /* create memsegs and put them into DPDK memory */ + for (i = 0; i < (unsigned) memseg_idx; i++) { + + seg = &ms_tbl[i]; + + ms.addr_64 = seg->entry.mz.addr_64; + ms.hugepage_sz = seg->entry.mz.hugepage_sz; + ms.len = seg->entry.mz.len; + ms.nchannel = rte_memory_get_nchannel(); + ms.nrank = rte_memory_get_nrank(); + ms.phys_addr = seg->entry.mz.phys_addr; + ms.ioremap_addr = seg->entry.mz.ioremap_addr; + ms.socket_id = seg->entry.mz.socket_id; + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_PRIVATE, fd_zero, 0); + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map /dev/zero!\n"); + return -1; + } + + fd = open(seg->path, O_RDWR); + + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", seg->path, + strerror(errno)); + return -1; + } + + munmap(ms.addr, ms.len); + + base_addr = mmap(ms.addr, ms.len, + PROT_READ | PROT_WRITE, MAP_SHARED, fd, + seg->entry.offset); + + + if (base_addr == MAP_FAILED || base_addr != ms.addr) { + RTE_LOG(ERR, EAL, "Cannot map segment into memory: " + "expected %p got %p (%s)\n", ms.addr, base_addr, + strerror(errno)); + return -1; + } + + RTE_LOG(DEBUG, EAL, "Memory segment mapped: %p (len %" PRIx64 ") at " + "offset 0x%" PRIx64 "\n", + ms.addr, ms.len, seg->entry.offset); + + /* put the pointers back into their real positions using original + * alignment */ + ms.addr_64 += seg->align; + ms.phys_addr += seg->align; + ms.ioremap_addr += seg->align; + ms.len -= seg->align; + + /* at this point, the rest of DPDK memory is not initialized, so we + * expect memsegs to be empty */ + memcpy(&mcfg->memseg[i], &ms, + sizeof(struct rte_memseg)); + + close(fd); + + RTE_LOG(DEBUG, EAL, "IVSHMEM segment found, size: 0x%lx\n", + ms.len); + } + + return 0; +} + +/* this happens at a later stage, after general EAL memory initialization */ +int +rte_eal_ivshmem_obj_init(void) +{ + struct rte_ring_list* ring_list = NULL; + struct rte_mem_config * mcfg; + struct ivshmem_segment * seg; + struct rte_memzone * mz; + struct rte_ring * r; + struct rte_tailq_entry *te; + unsigned i, ms, idx; + uint64_t offset; + + /* secondary process would not need any object discovery - it'll all + * already be in shared config */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY || ivshmem_config == NULL) + return 0; + + /* check that we have an initialised ring tail queue */ + ring_list = RTE_TAILQ_LOOKUP(RTE_TAILQ_RING_NAME, rte_ring_list); + if (ring_list == NULL) { + RTE_LOG(ERR, EAL, "No rte_ring tailq found!\n"); + return -1; + } + + mcfg = rte_eal_get_configuration()->mem_config; + + /* create memzones */ + for (i = 0; i < ivshmem_config->segment_idx && i <= RTE_MAX_MEMZONE; i++) { + + seg = &ivshmem_config->segment[i]; + + /* add memzone */ + if (mcfg->memzone_cnt == RTE_MAX_MEMZONE) { + RTE_LOG(ERR, EAL, "No more memory zones available!\n"); + return -1; + } + + idx = mcfg->memzone_cnt; + + RTE_LOG(DEBUG, EAL, "Found memzone: '%s' at %p (len 0x%" PRIx64 ")\n", + seg->entry.mz.name, seg->entry.mz.addr, seg->entry.mz.len); + + memcpy(&mcfg->memzone[idx], &seg->entry.mz, + sizeof(struct rte_memzone)); + + /* find ioremap address */ + for (ms = 0; ms <= RTE_MAX_MEMSEG; ms++) { + if (ms == RTE_MAX_MEMSEG) { + RTE_LOG(ERR, EAL, "Physical address of segment not found!\n"); + return -1; + } + if (CONTAINS(mcfg->memseg[ms], mcfg->memzone[idx])) { + offset = mcfg->memzone[idx].addr_64 - + mcfg->memseg[ms].addr_64; + mcfg->memzone[idx].ioremap_addr = mcfg->memseg[ms].ioremap_addr + + offset; + break; + } + } + + mcfg->memzone_cnt++; + } + + rte_rwlock_write_lock(RTE_EAL_TAILQ_RWLOCK); + + /* find rings */ + for (i = 0; i < mcfg->memzone_cnt; i++) { + mz = &mcfg->memzone[i]; + + /* check if memzone has a ring prefix */ + if (strncmp(mz->name, RTE_RING_MZ_PREFIX, + sizeof(RTE_RING_MZ_PREFIX) - 1) != 0) + continue; + + r = (struct rte_ring*) (mz->addr_64); + + te = rte_zmalloc("RING_TAILQ_ENTRY", sizeof(*te), 0); + if (te == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate ring tailq entry!\n"); + return -1; + } + + te->data = (void *) r; + + TAILQ_INSERT_TAIL(ring_list, te, next); + + RTE_LOG(DEBUG, EAL, "Found ring: '%s' at %p\n", r->name, mz->addr); + } + rte_rwlock_write_unlock(RTE_EAL_TAILQ_RWLOCK); + +#ifdef RTE_LIBRTE_IVSHMEM_DEBUG + rte_memzone_dump(stdout); + rte_ring_list_dump(stdout); +#endif + + return 0; +} + +/* initialize ivshmem structures */ +int rte_eal_ivshmem_init(void) +{ + struct rte_pci_device * dev; + struct rte_pci_resource * res; + int fd, ret; + char path[PATH_MAX]; + + /* initialize everything to 0 */ + memset(path, 0, sizeof(path)); + ivshmem_config = NULL; + + pagesz = getpagesize(); + + RTE_LOG(DEBUG, EAL, "Searching for IVSHMEM devices...\n"); + + if (rte_eal_process_type() == RTE_PROC_SECONDARY) { + + if (open_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not open IVSHMEM config!\n"); + return -1; + } + } + else { + + TAILQ_FOREACH(dev, &pci_device_list, next) { + + if (is_ivshmem_device(dev)) { + + /* IVSHMEM memory is always on BAR2 */ + res = &dev->mem_resource[2]; + + /* if we don't have a BAR2 */ + if (res->len == 0) + continue; + + /* construct pci device path */ + snprintf(path, sizeof(path), IVSHMEM_RESOURCE_PATH, + dev->addr.domain, dev->addr.bus, dev->addr.devid, + dev->addr.function); + + /* try to find memseg */ + fd = open(path, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", path); + return -1; + } + + /* check if it's a DPDK IVSHMEM device */ + ret = has_ivshmem_metadata(fd, res->len); + + /* is DPDK device */ + if (ret == 1) { + + /* config file creation is deferred until the first + * DPDK device is found. then, it has to be created + * only once. */ + if (ivshmem_config == NULL && + create_shared_config() < 0) { + RTE_LOG(ERR, EAL, "Could not create IVSHMEM config!\n"); + close(fd); + return -1; + } + + if (read_metadata(path, sizeof(path), fd, res->len) < 0) { + RTE_LOG(ERR, EAL, "Could not read metadata from" + " device %02x:%02x.%x!\n", dev->addr.bus, + dev->addr.devid, dev->addr.function); + close(fd); + return -1; + } + + if (ivshmem_config->pci_devs_idx == RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS) { + RTE_LOG(WARNING, EAL, + "IVSHMEM PCI device limit exceeded. Increase " + "CONFIG_RTE_LIBRTE_IVSHMEM_MAX_PCI_DEVS in " + "your config file.\n"); + break; + } + + RTE_LOG(INFO, EAL, "Found IVSHMEM device %02x:%02x.%x\n", + dev->addr.bus, dev->addr.devid, dev->addr.function); + + ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].ioremap_addr = res->phys_addr; + snprintf(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path, + sizeof(ivshmem_config->pci_devs[ivshmem_config->pci_devs_idx].path), + "%s", path); + + ivshmem_config->pci_devs_idx++; + } + /* failed to read */ + else if (ret < 0) { + RTE_LOG(ERR, EAL, "Could not read IVSHMEM device: %s\n", + strerror(errno)); + close(fd); + return -1; + } + /* not a DPDK device */ + else + RTE_LOG(DEBUG, EAL, "Skipping non-DPDK IVSHMEM device\n"); + + /* close the BAR fd */ + close(fd); + } + } + } + + /* ivshmem_config is not NULL only if config was created and/or mapped */ + if (ivshmem_config) { + if (map_all_segments() < 0) { + RTE_LOG(ERR, EAL, "Mapping IVSHMEM segments failed!\n"); + return -1; + } + } + else { + RTE_LOG(DEBUG, EAL, "No IVSHMEM configuration found! \n"); + } + + return 0; +} + +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_lcore.c b/lib/librte_eal/linuxapp/eal/eal_lcore.c new file mode 100644 index 00000000..de5b4260 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_lcore.c @@ -0,0 +1,110 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_filesystem.h" +#include "eal_thread.h" + +#define SYS_CPU_DIR "/sys/devices/system/cpu/cpu%u" +#define CORE_ID_FILE "topology/core_id" +#define NUMA_NODE_PATH "/sys/devices/system/node" + +/* Check if a cpu is present by the presence of the cpu information for it */ +int +eal_cpu_detected(unsigned lcore_id) +{ + char path[PATH_MAX]; + int len = snprintf(path, sizeof(path), SYS_CPU_DIR + "/"CORE_ID_FILE, lcore_id); + if (len <= 0 || (unsigned)len >= sizeof(path)) + return 0; + if (access(path, F_OK) != 0) + return 0; + + return 1; +} + +/* + * Get CPU socket id (NUMA node) for a logical core. + * + * This searches each nodeX directories in /sys for the symlink for the given + * lcore_id and returns the numa node where the lcore is found. If lcore is not + * found on any numa node, returns zero. + */ +unsigned +eal_cpu_socket_id(unsigned lcore_id) +{ + unsigned socket; + + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + char path[PATH_MAX]; + + snprintf(path, sizeof(path), "%s/node%u/cpu%u", NUMA_NODE_PATH, + socket, lcore_id); + if (access(path, F_OK) == 0) + return socket; + } + return 0; +} + +/* Get the cpu core id value from the /sys/.../cpuX core_id value */ +unsigned +eal_cpu_core_id(unsigned lcore_id) +{ + char path[PATH_MAX]; + unsigned long id; + + int len = snprintf(path, sizeof(path), SYS_CPU_DIR "/%s", lcore_id, CORE_ID_FILE); + if (len <= 0 || (unsigned)len >= sizeof(path)) + goto err; + if (eal_parse_sysfs_value(path, &id) != 0) + goto err; + return (unsigned)id; + +err: + RTE_LOG(ERR, EAL, "Error reading core id value from %s " + "for lcore %u - assuming core 0\n", SYS_CPU_DIR, lcore_id); + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_log.c b/lib/librte_eal/linuxapp/eal/eal_log.c new file mode 100644 index 00000000..0b133c3e --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_log.c @@ -0,0 +1,146 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" + +/* + * default log function, used once mempool (hence log history) is + * available + */ +static ssize_t +console_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) +{ + char copybuf[BUFSIZ + 1]; + ssize_t ret; + uint32_t loglevel; + + /* add this log in history */ + rte_log_add_in_history(buf, size); + + /* write on stdout */ + ret = fwrite(buf, 1, size, stdout); + fflush(stdout); + + /* truncate message if too big (should not happen) */ + if (size > BUFSIZ) + size = BUFSIZ; + + /* Syslog error levels are from 0 to 7, so subtract 1 to convert */ + loglevel = rte_log_cur_msg_loglevel() - 1; + memcpy(copybuf, buf, size); + copybuf[size] = '\0'; + + /* write on syslog too */ + syslog(loglevel, "%s", copybuf); + + return ret; +} + +static cookie_io_functions_t console_log_func = { + .write = console_log_write, +}; + +/* + * set the log to default function, called during eal init process, + * once memzones are available. + */ +int +rte_eal_log_init(const char *id, int facility) +{ + FILE *log_stream; + + log_stream = fopencookie(NULL, "w+", console_log_func); + if (log_stream == NULL) + return -1; + + openlog(id, LOG_NDELAY | LOG_PID, facility); + + if (rte_eal_common_log_init(log_stream) < 0) + return -1; + + return 0; +} + +/* early logs */ + +/* + * early log function, used during boot when mempool (hence log + * history) is not available + */ +static ssize_t +early_log_write(__attribute__((unused)) void *c, const char *buf, size_t size) +{ + ssize_t ret; + ret = fwrite(buf, size, 1, stdout); + fflush(stdout); + if (ret == 0) + return -1; + return ret; +} + +static cookie_io_functions_t early_log_func = { + .write = early_log_write, +}; +static FILE *early_log_stream; + +/* + * init the log library, called by rte_eal_init() to enable early + * logs + */ +int +rte_eal_log_early_init(void) +{ + early_log_stream = fopencookie(NULL, "w+", early_log_func); + if (early_log_stream == NULL) { + printf("Cannot configure early_log_stream\n"); + return -1; + } + rte_openlog_stream(early_log_stream); + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_memory.c b/lib/librte_eal/linuxapp/eal/eal_memory.c new file mode 100644 index 00000000..5b9132c6 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_memory.c @@ -0,0 +1,1559 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +/* BSD LICENSE + * + * Copyright(c) 2013 6WIND. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of 6WIND S.A. nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#define _FILE_OFFSET_BITS 64 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include "eal_hugepages.h" + +#ifdef RTE_LIBRTE_XEN_DOM0 +int rte_xen_dom0_supported(void) +{ + return internal_config.xen_dom0_support; +} +#endif + +/** + * @file + * Huge page mapping under linux + * + * To reserve a big contiguous amount of memory, we use the hugepage + * feature of linux. For that, we need to have hugetlbfs mounted. This + * code will create many files in this directory (one per page) and + * map them in virtual memory. For each page, we will retrieve its + * physical address and remap it in order to have a virtual contiguous + * zone as well as a physical contiguous zone. + */ + +static uint64_t baseaddr_offset; + +static unsigned proc_pagemap_readable; + +#define RANDOMIZE_VA_SPACE_FILE "/proc/sys/kernel/randomize_va_space" + +static void +test_proc_pagemap_readable(void) +{ + int fd = open("/proc/self/pagemap", O_RDONLY); + + if (fd < 0) { + RTE_LOG(ERR, EAL, + "Cannot open /proc/self/pagemap: %s. " + "virt2phys address translation will not work\n", + strerror(errno)); + return; + } + + /* Is readable */ + close(fd); + proc_pagemap_readable = 1; +} + +/* Lock page in physical memory and prevent from swapping. */ +int +rte_mem_lock_page(const void *virt) +{ + unsigned long virtual = (unsigned long)virt; + int page_size = getpagesize(); + unsigned long aligned = (virtual & ~ (page_size - 1)); + return mlock((void*)aligned, page_size); +} + +/* + * Get physical address of any mapped virtual address in the current process. + */ +phys_addr_t +rte_mem_virt2phy(const void *virtaddr) +{ + int fd; + uint64_t page, physaddr; + unsigned long virt_pfn; + int page_size; + off_t offset; + + /* Cannot parse /proc/self/pagemap, no need to log errors everywhere */ + if (!proc_pagemap_readable) + return RTE_BAD_PHYS_ADDR; + + /* standard page size */ + page_size = getpagesize(); + + fd = open("/proc/self/pagemap", O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot open /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + return RTE_BAD_PHYS_ADDR; + } + + virt_pfn = (unsigned long)virtaddr / page_size; + offset = sizeof(uint64_t) * virt_pfn; + if (lseek(fd, offset, SEEK_SET) == (off_t) -1) { + RTE_LOG(ERR, EAL, "%s(): seek error in /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return RTE_BAD_PHYS_ADDR; + } + if (read(fd, &page, sizeof(uint64_t)) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot read /proc/self/pagemap: %s\n", + __func__, strerror(errno)); + close(fd); + return RTE_BAD_PHYS_ADDR; + } + + /* + * the pfn (page frame number) are bits 0-54 (see + * pagemap.txt in linux Documentation) + */ + physaddr = ((page & 0x7fffffffffffffULL) * page_size) + + ((unsigned long)virtaddr % page_size); + close(fd); + return physaddr; +} + +/* + * For each hugepage in hugepg_tbl, fill the physaddr value. We find + * it by browsing the /proc/self/pagemap special file. + */ +static int +find_physaddrs(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + phys_addr_t addr; + + for (i = 0; i < hpi->num_pages[0]; i++) { + addr = rte_mem_virt2phy(hugepg_tbl[i].orig_va); + if (addr == RTE_BAD_PHYS_ADDR) + return -1; + hugepg_tbl[i].physaddr = addr; + } + return 0; +} + +/* + * Check whether address-space layout randomization is enabled in + * the kernel. This is important for multi-process as it can prevent + * two processes mapping data to the same virtual address + * Returns: + * 0 - address space randomization disabled + * 1/2 - address space randomization enabled + * negative error code on error + */ +static int +aslr_enabled(void) +{ + char c; + int retval, fd = open(RANDOMIZE_VA_SPACE_FILE, O_RDONLY); + if (fd < 0) + return -errno; + retval = read(fd, &c, 1); + close(fd); + if (retval < 0) + return -errno; + if (retval == 0) + return -EIO; + switch (c) { + case '0' : return 0; + case '1' : return 1; + case '2' : return 2; + default: return -EINVAL; + } +} + +/* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by hugepage_sz until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of hugepage size. + */ +static void * +get_virtual_area(size_t *size, size_t hugepage_sz) +{ + void *addr; + int fd; + long aligned_addr; + + if (internal_config.base_virtaddr != 0) { + addr = (void*) (uintptr_t) (internal_config.base_virtaddr + + baseaddr_offset); + } + else addr = NULL; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zx bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0){ + RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); + return NULL; + } + do { + addr = mmap(addr, + (*size) + hugepage_sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + *size -= hugepage_sz; + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + close(fd); + RTE_LOG(ERR, EAL, "Cannot get a virtual area: %s\n", + strerror(errno)); + return NULL; + } + + munmap(addr, (*size) + hugepage_sz); + close(fd); + + /* align addr to a huge page size boundary */ + aligned_addr = (long)addr; + aligned_addr += (hugepage_sz - 1); + aligned_addr &= (~(hugepage_sz - 1)); + addr = (void *)(aligned_addr); + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + /* increment offset */ + baseaddr_offset += *size; + + return addr; +} + +/* + * Mmap all hugepages of hugepage table: it first open a file in + * hugetlbfs, then mmap() hugepage_sz data in it. If orig is set, the + * virtual address is stored in hugepg_tbl[i].orig_va, else it is stored + * in hugepg_tbl[i].final_va. The second mapping (when orig is 0) tries to + * map continguous physical blocks in contiguous virtual blocks. + */ +static int +map_all_hugepages(struct hugepage_file *hugepg_tbl, + struct hugepage_info *hpi, int orig) +{ + int fd; + unsigned i; + void *virtaddr; + void *vma_addr = NULL; + size_t vma_len = 0; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + RTE_SET_USED(vma_len); +#endif + + for (i = 0; i < hpi->num_pages[0]; i++) { + uint64_t hugepage_sz = hpi->hugepage_sz; + + if (orig) { + hugepg_tbl[i].file_id = i; + hugepg_tbl[i].size = hugepage_sz; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + eal_get_hugefile_temp_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#else + eal_get_hugefile_path(hugepg_tbl[i].filepath, + sizeof(hugepg_tbl[i].filepath), hpi->hugedir, + hugepg_tbl[i].file_id); +#endif + hugepg_tbl[i].filepath[sizeof(hugepg_tbl[i].filepath) - 1] = '\0'; + } +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G and 16G pages, just reuse + * original map address as final map address. + */ + else if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + continue; + } +#endif + +#ifndef RTE_EAL_SINGLE_FILE_SEGMENTS + else if (vma_len == 0) { + unsigned j, num_pages; + + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in + * descending order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) + break; +#endif + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + if (vma_addr == NULL) + vma_len = hugepage_sz; + } +#endif + + /* try to create hugepage file */ + fd = open(hugepg_tbl[i].filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, + strerror(errno)); + return -1; + } + + /* map the segment, and populate page tables, + * the kernel fills this segment with zeros */ + virtaddr = mmap(vma_addr, hugepage_sz, PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, fd, 0); + if (virtaddr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, + strerror(errno)); + close(fd); + return -1; + } + + if (orig) { + hugepg_tbl[i].orig_va = virtaddr; + } + else { + hugepg_tbl[i].final_va = virtaddr; + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); + close(fd); + return -1; + } + + close(fd); + + vma_addr = (char *)vma_addr + hugepage_sz; + vma_len -= hugepage_sz; + } + return 0; +} + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + +/* + * Remaps all hugepages into single file segments + */ +static int +remap_all_hugepages(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + int fd; + unsigned i = 0, j, num_pages, page_idx = 0; + void *vma_addr = NULL, *old_addr = NULL, *page_addr = NULL; + size_t vma_len = 0; + size_t hugepage_sz = hpi->hugepage_sz; + size_t total_size, offset; + char filepath[MAX_HUGEPAGE_PATH]; + phys_addr_t physaddr; + int socket; + + while (i < hpi->num_pages[0]) { + +#ifndef RTE_ARCH_64 + /* for 32-bit systems, don't remap 1G pages and 16G pages, + * just reuse original map address as final map address. + */ + if ((hugepage_sz == RTE_PGSIZE_1G) + || (hugepage_sz == RTE_PGSIZE_16G)) { + hugepg_tbl[i].final_va = hugepg_tbl[i].orig_va; + hugepg_tbl[i].orig_va = NULL; + i++; + continue; + } +#endif + + /* reserve a virtual area for next contiguous + * physical block: count the number of + * contiguous physical pages. */ + for (j = i+1; j < hpi->num_pages[0] ; j++) { +#ifdef RTE_ARCH_PPC_64 + /* The physical addresses are sorted in descending + * order on PPC64 */ + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr - hugepage_sz) + break; +#else + if (hugepg_tbl[j].physaddr != + hugepg_tbl[j-1].physaddr + hugepage_sz) + break; +#endif + } + num_pages = j - i; + vma_len = num_pages * hugepage_sz; + + socket = hugepg_tbl[i].socket_id; + + /* get the biggest virtual memory area up to + * vma_len. If it fails, vma_addr is NULL, so + * let the kernel provide the address. */ + vma_addr = get_virtual_area(&vma_len, hpi->hugepage_sz); + + /* If we can't find a big enough virtual area, work out how many pages + * we are going to get */ + if (vma_addr == NULL) + j = i + 1; + else if (vma_len != num_pages * hugepage_sz) { + num_pages = vma_len / hugepage_sz; + j = i + num_pages; + + } + + hugepg_tbl[page_idx].file_id = page_idx; + eal_get_hugefile_path(filepath, + sizeof(filepath), + hpi->hugedir, + hugepg_tbl[page_idx].file_id); + + /* try to create hugepage file */ + fd = open(filepath, O_CREAT | O_RDWR, 0755); + if (fd < 0) { + RTE_LOG(ERR, EAL, "%s(): open failed: %s\n", __func__, strerror(errno)); + return -1; + } + + total_size = 0; + for (;i < j; i++) { + + /* unmap current segment */ + if (total_size > 0) + munmap(vma_addr, total_size); + + /* unmap original page */ + munmap(hugepg_tbl[i].orig_va, hugepage_sz); + unlink(hugepg_tbl[i].filepath); + + total_size += hugepage_sz; + + old_addr = vma_addr; + + /* map new, bigger segment, and populate page tables, + * the kernel fills this segment with zeros */ + vma_addr = mmap(vma_addr, total_size, + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, 0); + + if (vma_addr == MAP_FAILED || vma_addr != old_addr) { + RTE_LOG(ERR, EAL, "%s(): mmap failed: %s\n", __func__, strerror(errno)); + close(fd); + return -1; + } + } + + /* set shared flock on the file. */ + if (flock(fd, LOCK_SH | LOCK_NB) == -1) { + RTE_LOG(ERR, EAL, "%s(): Locking file failed:%s \n", + __func__, strerror(errno)); + close(fd); + return -1; + } + + snprintf(hugepg_tbl[page_idx].filepath, MAX_HUGEPAGE_PATH, "%s", + filepath); + + physaddr = rte_mem_virt2phy(vma_addr); + + if (physaddr == RTE_BAD_PHYS_ADDR) + return -1; + + hugepg_tbl[page_idx].final_va = vma_addr; + + hugepg_tbl[page_idx].physaddr = physaddr; + + hugepg_tbl[page_idx].repeated = num_pages; + + hugepg_tbl[page_idx].socket_id = socket; + + close(fd); + + /* verify the memory segment - that is, check that every VA corresponds + * to the physical address we expect to see + */ + for (offset = 0; offset < vma_len; offset += hugepage_sz) { + uint64_t expected_physaddr; + + expected_physaddr = hugepg_tbl[page_idx].physaddr + offset; + page_addr = RTE_PTR_ADD(vma_addr, offset); + physaddr = rte_mem_virt2phy(page_addr); + + if (physaddr != expected_physaddr) { + RTE_LOG(ERR, EAL, "Segment sanity check failed: wrong physaddr " + "at %p (offset 0x%" PRIx64 ": 0x%" PRIx64 + " (expected 0x%" PRIx64 ")\n", + page_addr, offset, physaddr, expected_physaddr); + return -1; + } + } + + page_idx++; + } + + /* zero out the rest */ + memset(&hugepg_tbl[page_idx], 0, (hpi->num_pages[0] - page_idx) * sizeof(struct hugepage_file)); + return page_idx; +} +#else/* RTE_EAL_SINGLE_FILE_SEGMENTS=n */ + +/* Unmap all hugepages from original mapping */ +static int +unmap_all_hugepages_orig(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + unsigned i; + for (i = 0; i < hpi->num_pages[0]; i++) { + if (hugepg_tbl[i].orig_va) { + munmap(hugepg_tbl[i].orig_va, hpi->hugepage_sz); + hugepg_tbl[i].orig_va = NULL; + } + } + return 0; +} +#endif /* RTE_EAL_SINGLE_FILE_SEGMENTS */ + +/* + * Parse /proc/self/numa_maps to get the NUMA socket ID for each huge + * page. + */ +static int +find_numasocket(struct hugepage_file *hugepg_tbl, struct hugepage_info *hpi) +{ + int socket_id; + char *end, *nodestr; + unsigned i, hp_count = 0; + uint64_t virt_addr; + char buf[BUFSIZ]; + char hugedir_str[PATH_MAX]; + FILE *f; + + f = fopen("/proc/self/numa_maps", "r"); + if (f == NULL) { + RTE_LOG(NOTICE, EAL, "cannot open /proc/self/numa_maps," + " consider that all memory is in socket_id 0\n"); + return 0; + } + + snprintf(hugedir_str, sizeof(hugedir_str), + "%s/%s", hpi->hugedir, internal_config.hugefile_prefix); + + /* parse numa map */ + while (fgets(buf, sizeof(buf), f) != NULL) { + + /* ignore non huge page */ + if (strstr(buf, " huge ") == NULL && + strstr(buf, hugedir_str) == NULL) + continue; + + /* get zone addr */ + virt_addr = strtoull(buf, &end, 16); + if (virt_addr == 0 || end == buf) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + + /* get node id (socket id) */ + nodestr = strstr(buf, " N"); + if (nodestr == NULL) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + nodestr += 2; + end = strstr(nodestr, "="); + if (end == NULL) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + end[0] = '\0'; + end = NULL; + + socket_id = strtoul(nodestr, &end, 0); + if ((nodestr[0] == '\0') || (end == NULL) || (*end != '\0')) { + RTE_LOG(ERR, EAL, "%s(): error in numa_maps parsing\n", __func__); + goto error; + } + + /* if we find this page in our mappings, set socket_id */ + for (i = 0; i < hpi->num_pages[0]; i++) { + void *va = (void *)(unsigned long)virt_addr; + if (hugepg_tbl[i].orig_va == va) { + hugepg_tbl[i].socket_id = socket_id; + hp_count++; + } + } + } + + if (hp_count < hpi->num_pages[0]) + goto error; + + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +static int +cmp_physaddr(const void *a, const void *b) +{ +#ifndef RTE_ARCH_PPC_64 + const struct hugepage_file *p1 = (const struct hugepage_file *)a; + const struct hugepage_file *p2 = (const struct hugepage_file *)b; +#else + /* PowerPC needs memory sorted in reverse order from x86 */ + const struct hugepage_file *p1 = (const struct hugepage_file *)b; + const struct hugepage_file *p2 = (const struct hugepage_file *)a; +#endif + if (p1->physaddr < p2->physaddr) + return -1; + else if (p1->physaddr > p2->physaddr) + return 1; + else + return 0; +} + +/* + * Uses mmap to create a shared memory area for storage of data + * Used in this file to store the hugepage file map on disk + */ +static void * +create_shared_memory(const char *filename, const size_t mem_size) +{ + void *retval; + int fd = open(filename, O_CREAT | O_RDWR, 0666); + if (fd < 0) + return NULL; + if (ftruncate(fd, mem_size) < 0) { + close(fd); + return NULL; + } + retval = mmap(NULL, mem_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + close(fd); + return retval; +} + +/* + * this copies *active* hugepages from one hugepage table to another. + * destination is typically the shared memory. + */ +static int +copy_hugepages_to_shared_mem(struct hugepage_file * dst, int dest_size, + const struct hugepage_file * src, int src_size) +{ + int src_pos, dst_pos = 0; + + for (src_pos = 0; src_pos < src_size; src_pos++) { + if (src[src_pos].final_va != NULL) { + /* error on overflow attempt */ + if (dst_pos == dest_size) + return -1; + memcpy(&dst[dst_pos], &src[src_pos], sizeof(struct hugepage_file)); + dst_pos++; + } + } + return 0; +} + +static int +unlink_hugepage_files(struct hugepage_file *hugepg_tbl, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += + internal_config.hugepage_info[size].num_pages[socket]; + + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + + if (hp->final_va != NULL && unlink(hp->filepath)) { + RTE_LOG(WARNING, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + } + } + return 0; +} + +/* + * unmaps hugepages that are not going to be used. since we originally allocate + * ALL hugepages (not just those we need), additional unmapping needs to be done. + */ +static int +unmap_unneeded_hugepages(struct hugepage_file *hugepg_tbl, + struct hugepage_info *hpi, + unsigned num_hp_info) +{ + unsigned socket, size; + int page, nrpages = 0; + + /* get total number of hugepages */ + for (size = 0; size < num_hp_info; size++) + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) + nrpages += internal_config.hugepage_info[size].num_pages[socket]; + + for (size = 0; size < num_hp_info; size++) { + for (socket = 0; socket < RTE_MAX_NUMA_NODES; socket++) { + unsigned pages_found = 0; + + /* traverse until we have unmapped all the unused pages */ + for (page = 0; page < nrpages; page++) { + struct hugepage_file *hp = &hugepg_tbl[page]; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* if this page was already cleared */ + if (hp->final_va == NULL) + continue; +#endif + + /* find a page that matches the criteria */ + if ((hp->size == hpi[size].hugepage_sz) && + (hp->socket_id == (int) socket)) { + + /* if we skipped enough pages, unmap the rest */ + if (pages_found == hpi[size].num_pages[socket]) { + uint64_t unmap_len; + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + unmap_len = hp->size * hp->repeated; +#else + unmap_len = hp->size; +#endif + + /* get start addr and len of the remaining segment */ + munmap(hp->final_va, (size_t) unmap_len); + + hp->final_va = NULL; + if (unlink(hp->filepath) == -1) { + RTE_LOG(ERR, EAL, "%s(): Removing %s failed: %s\n", + __func__, hp->filepath, strerror(errno)); + return -1; + } + } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* else, check how much do we need to map */ + else { + int nr_pg_left = + hpi[size].num_pages[socket] - pages_found; + + /* if we need enough memory to fit into the segment */ + if (hp->repeated <= nr_pg_left) { + pages_found += hp->repeated; + } + /* truncate the segment */ + else { + uint64_t final_size = nr_pg_left * hp->size; + uint64_t seg_size = hp->repeated * hp->size; + + void * unmap_va = RTE_PTR_ADD(hp->final_va, + final_size); + int fd; + + munmap(unmap_va, seg_size - final_size); + + fd = open(hp->filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + if (ftruncate(fd, final_size) < 0) { + RTE_LOG(ERR, EAL, "Cannot truncate %s: %s\n", + hp->filepath, strerror(errno)); + return -1; + } + close(fd); + + pages_found += nr_pg_left; + hp->repeated = nr_pg_left; + } + } +#else + /* else, lock the page and skip */ + else + pages_found++; +#endif + + } /* match page */ + } /* foreach page */ + } /* foreach socket */ + } /* foreach pagesize */ + + return 0; +} + +static inline uint64_t +get_socket_mem_size(int socket) +{ + uint64_t size = 0; + unsigned i; + + for (i = 0; i < internal_config.num_hugepage_sizes; i++){ + struct hugepage_info *hpi = &internal_config.hugepage_info[i]; + if (hpi->hugedir != NULL) + size += hpi->hugepage_sz * hpi->num_pages[socket]; + } + + return size; +} + +/* + * This function is a NUMA-aware equivalent of calc_num_pages. + * It takes in the list of hugepage sizes and the + * number of pages thereof, and calculates the best number of + * pages of each size to fulfill the request for ram + */ +static int +calc_num_pages_per_socket(uint64_t * memory, + struct hugepage_info *hp_info, + struct hugepage_info *hp_used, + unsigned num_hp_info) +{ + unsigned socket, j, i = 0; + unsigned requested, available; + int total_num_pages = 0; + uint64_t remaining_mem, cur_mem; + uint64_t total_mem = internal_config.memory; + + if (num_hp_info == 0) + return -1; + + /* if specific memory amounts per socket weren't requested */ + if (internal_config.force_sockets == 0) { + int cpu_per_socket[RTE_MAX_NUMA_NODES]; + size_t default_size, total_size; + unsigned lcore_id; + + /* Compute number of cores per socket */ + memset(cpu_per_socket, 0, sizeof(cpu_per_socket)); + RTE_LCORE_FOREACH(lcore_id) { + cpu_per_socket[rte_lcore_to_socket_id(lcore_id)]++; + } + + /* + * Automatically spread requested memory amongst detected sockets according + * to number of cores from cpu mask present on each socket + */ + total_size = internal_config.memory; + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + + /* Set memory amount per socket */ + default_size = (internal_config.memory * cpu_per_socket[socket]) + / rte_lcore_count(); + + /* Limit to maximum available memory on socket */ + default_size = RTE_MIN(default_size, get_socket_mem_size(socket)); + + /* Update sizes */ + memory[socket] = default_size; + total_size -= default_size; + } + + /* + * If some memory is remaining, try to allocate it by getting all + * available memory from sockets, one after the other + */ + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_size != 0; socket++) { + /* take whatever is available */ + default_size = RTE_MIN(get_socket_mem_size(socket) - memory[socket], + total_size); + + /* Update sizes */ + memory[socket] += default_size; + total_size -= default_size; + } + } + + for (socket = 0; socket < RTE_MAX_NUMA_NODES && total_mem != 0; socket++) { + /* skips if the memory on specific socket wasn't requested */ + for (i = 0; i < num_hp_info && memory[socket] != 0; i++){ + hp_used[i].hugedir = hp_info[i].hugedir; + hp_used[i].num_pages[socket] = RTE_MIN( + memory[socket] / hp_info[i].hugepage_sz, + hp_info[i].num_pages[socket]); + + cur_mem = hp_used[i].num_pages[socket] * + hp_used[i].hugepage_sz; + + memory[socket] -= cur_mem; + total_mem -= cur_mem; + + total_num_pages += hp_used[i].num_pages[socket]; + + /* check if we have met all memory requests */ + if (memory[socket] == 0) + break; + + /* check if we have any more pages left at this size, if so + * move on to next size */ + if (hp_used[i].num_pages[socket] == hp_info[i].num_pages[socket]) + continue; + /* At this point we know that there are more pages available that are + * bigger than the memory we want, so lets see if we can get enough + * from other page sizes. + */ + remaining_mem = 0; + for (j = i+1; j < num_hp_info; j++) + remaining_mem += hp_info[j].hugepage_sz * + hp_info[j].num_pages[socket]; + + /* is there enough other memory, if not allocate another page and quit */ + if (remaining_mem < memory[socket]){ + cur_mem = RTE_MIN(memory[socket], + hp_info[i].hugepage_sz); + memory[socket] -= cur_mem; + total_mem -= cur_mem; + hp_used[i].num_pages[socket]++; + total_num_pages++; + break; /* we are done with this socket*/ + } + } + /* if we didn't satisfy all memory requirements per socket */ + if (memory[socket] > 0) { + /* to prevent icc errors */ + requested = (unsigned) (internal_config.socket_mem[socket] / + 0x100000); + available = requested - + ((unsigned) (memory[socket] / 0x100000)); + RTE_LOG(ERR, EAL, "Not enough memory available on socket %u! " + "Requested: %uMB, available: %uMB\n", socket, + requested, available); + return -1; + } + } + + /* if we didn't satisfy total memory requirements */ + if (total_mem > 0) { + requested = (unsigned) (internal_config.memory / 0x100000); + available = requested - (unsigned) (total_mem / 0x100000); + RTE_LOG(ERR, EAL, "Not enough memory available! Requested: %uMB," + " available: %uMB\n", requested, available); + return -1; + } + return total_num_pages; +} + +/* + * Prepare physical memory mapping: fill configuration structure with + * these infos, return 0 on success. + * 1. map N huge pages in separate files in hugetlbfs + * 2. find associated physical addr + * 3. find associated NUMA socket ID + * 4. sort all huge pages by physical address + * 5. remap these N huge pages in the correct order + * 6. unmap the first mapping + * 7. fill memsegs in configuration with contiguous zones + */ +int +rte_eal_hugepage_init(void) +{ + struct rte_mem_config *mcfg; + struct hugepage_file *hugepage, *tmp_hp = NULL; + struct hugepage_info used_hp[MAX_HUGEPAGE_SIZES]; + + uint64_t memory[RTE_MAX_NUMA_NODES]; + + unsigned hp_offset; + int i, j, new_memseg; + int nr_hugefiles, nr_hugepages = 0; + void *addr; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + int new_pages_count[MAX_HUGEPAGE_SIZES]; +#endif + + test_proc_pagemap_readable(); + + memset(used_hp, 0, sizeof(used_hp)); + + /* get pointer to global configuration */ + mcfg = rte_eal_get_configuration()->mem_config; + + /* hugetlbfs can be disabled */ + if (internal_config.no_hugetlbfs) { + addr = mmap(NULL, internal_config.memory, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + if (addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "%s: mmap() failed: %s\n", __func__, + strerror(errno)); + return -1; + } + mcfg->memseg[0].phys_addr = (phys_addr_t)(uintptr_t)addr; + mcfg->memseg[0].addr = addr; + mcfg->memseg[0].hugepage_sz = RTE_PGSIZE_4K; + mcfg->memseg[0].len = internal_config.memory; + mcfg->memseg[0].socket_id = 0; + return 0; + } + +/* check if app runs on Xen Dom0 */ + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + /* use dom0_mm kernel driver to init memory */ + if (rte_xen_dom0_memory_init() < 0) + return -1; + else + return 0; +#endif + } + + /* calculate total number of hugepages available. at this point we haven't + * yet started sorting them so they all are on socket 0 */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + /* meanwhile, also initialize used_hp hugepage sizes in used_hp */ + used_hp[i].hugepage_sz = internal_config.hugepage_info[i].hugepage_sz; + + nr_hugepages += internal_config.hugepage_info[i].num_pages[0]; + } + + /* + * allocate a memory area for hugepage table. + * this isn't shared memory yet. due to the fact that we need some + * processing done on these pages, shared memory will be created + * at a later stage. + */ + tmp_hp = malloc(nr_hugepages * sizeof(struct hugepage_file)); + if (tmp_hp == NULL) + goto fail; + + memset(tmp_hp, 0, nr_hugepages * sizeof(struct hugepage_file)); + + hp_offset = 0; /* where we start the current page size entries */ + + /* map all hugepages and sort them */ + for (i = 0; i < (int)internal_config.num_hugepage_sizes; i ++){ + struct hugepage_info *hpi; + + /* + * we don't yet mark hugepages as used at this stage, so + * we just map all hugepages available to the system + * all hugepages are still located on socket 0 + */ + hpi = &internal_config.hugepage_info[i]; + + if (hpi->num_pages[0] == 0) + continue; + + /* map all hugepages available */ + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 1) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to mmap %u MB hugepages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* find physical addresses and sockets for each hugepage */ + if (find_physaddrs(&tmp_hp[hp_offset], hpi) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to find phys addr for %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + if (find_numasocket(&tmp_hp[hp_offset], hpi) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to find NUMA socket for %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + qsort(&tmp_hp[hp_offset], hpi->num_pages[0], + sizeof(struct hugepage_file), cmp_physaddr); + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + /* remap all hugepages into single file segments */ + new_pages_count[i] = remap_all_hugepages(&tmp_hp[hp_offset], hpi); + if (new_pages_count[i] < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += new_pages_count[i]; +#else + /* remap all hugepages */ + if (map_all_hugepages(&tmp_hp[hp_offset], hpi, 0) < 0){ + RTE_LOG(DEBUG, EAL, "Failed to remap %u MB pages\n", + (unsigned)(hpi->hugepage_sz / 0x100000)); + goto fail; + } + + /* unmap original mappings */ + if (unmap_all_hugepages_orig(&tmp_hp[hp_offset], hpi) < 0) + goto fail; + + /* we have processed a num of hugepages of this size, so inc offset */ + hp_offset += hpi->num_pages[0]; +#endif + } + +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + nr_hugefiles = 0; + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + nr_hugefiles += new_pages_count[i]; + } +#else + nr_hugefiles = nr_hugepages; +#endif + + + /* clean out the numbers of pages */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) + internal_config.hugepage_info[i].num_pages[j] = 0; + + /* get hugepages for each socket */ + for (i = 0; i < nr_hugefiles; i++) { + int socket = tmp_hp[i].socket_id; + + /* find a hugepage info with right size and increment num_pages */ + const int nb_hpsizes = RTE_MIN(MAX_HUGEPAGE_SIZES, + (int)internal_config.num_hugepage_sizes); + for (j = 0; j < nb_hpsizes; j++) { + if (tmp_hp[i].size == + internal_config.hugepage_info[j].hugepage_sz) { +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + internal_config.hugepage_info[j].num_pages[socket] += + tmp_hp[i].repeated; +#else + internal_config.hugepage_info[j].num_pages[socket]++; +#endif + } + } + } + + /* make a copy of socket_mem, needed for number of pages calculation */ + for (i = 0; i < RTE_MAX_NUMA_NODES; i++) + memory[i] = internal_config.socket_mem[i]; + + /* calculate final number of pages */ + nr_hugepages = calc_num_pages_per_socket(memory, + internal_config.hugepage_info, used_hp, + internal_config.num_hugepage_sizes); + + /* error if not enough memory available */ + if (nr_hugepages < 0) + goto fail; + + /* reporting in! */ + for (i = 0; i < (int) internal_config.num_hugepage_sizes; i++) { + for (j = 0; j < RTE_MAX_NUMA_NODES; j++) { + if (used_hp[i].num_pages[j] > 0) { + RTE_LOG(DEBUG, EAL, + "Requesting %u pages of size %uMB" + " from socket %i\n", + used_hp[i].num_pages[j], + (unsigned) + (used_hp[i].hugepage_sz / 0x100000), + j); + } + } + } + + /* create shared memory */ + hugepage = create_shared_memory(eal_hugepage_info_path(), + nr_hugefiles * sizeof(struct hugepage_file)); + + if (hugepage == NULL) { + RTE_LOG(ERR, EAL, "Failed to create shared memory!\n"); + goto fail; + } + memset(hugepage, 0, nr_hugefiles * sizeof(struct hugepage_file)); + + /* + * unmap pages that we won't need (looks at used_hp). + * also, sets final_va to NULL on pages that were unmapped. + */ + if (unmap_unneeded_hugepages(tmp_hp, used_hp, + internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unmapping and locking hugepages failed!\n"); + goto fail; + } + + /* + * copy stuff from malloc'd hugepage* to the actual shared memory. + * this procedure only copies those hugepages that have final_va + * not NULL. has overflow protection. + */ + if (copy_hugepages_to_shared_mem(hugepage, nr_hugefiles, + tmp_hp, nr_hugefiles) < 0) { + RTE_LOG(ERR, EAL, "Copying tables to shared memory failed!\n"); + goto fail; + } + + /* free the hugepage backing files */ + if (internal_config.hugepage_unlink && + unlink_hugepage_files(tmp_hp, internal_config.num_hugepage_sizes) < 0) { + RTE_LOG(ERR, EAL, "Unlinking hugepage files failed!\n"); + goto fail; + } + + /* free the temporary hugepage table */ + free(tmp_hp); + tmp_hp = NULL; + + /* find earliest free memseg - this is needed because in case of IVSHMEM, + * segments might have already been initialized */ + for (j = 0; j < RTE_MAX_MEMSEG; j++) + if (mcfg->memseg[j].addr == NULL) { + /* move to previous segment and exit loop */ + j--; + break; + } + + for (i = 0; i < nr_hugefiles; i++) { + new_memseg = 0; + + /* if this is a new section, create a new memseg */ + if (i == 0) + new_memseg = 1; + else if (hugepage[i].socket_id != hugepage[i-1].socket_id) + new_memseg = 1; + else if (hugepage[i].size != hugepage[i-1].size) + new_memseg = 1; + +#ifdef RTE_ARCH_PPC_64 + /* On PPC64 architecture, the mmap always start from higher + * virtual address to lower address. Here, both the physical + * address and virtual address are in descending order */ + else if ((hugepage[i-1].physaddr - hugepage[i].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i-1].final_va - + (unsigned long)hugepage[i].final_va) != hugepage[i].size) + new_memseg = 1; +#else + else if ((hugepage[i].physaddr - hugepage[i-1].physaddr) != + hugepage[i].size) + new_memseg = 1; + else if (((unsigned long)hugepage[i].final_va - + (unsigned long)hugepage[i-1].final_va) != hugepage[i].size) + new_memseg = 1; +#endif + + if (new_memseg) { + j += 1; + if (j == RTE_MAX_MEMSEG) + break; + + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mcfg->memseg[j].len = hugepage[i].size * hugepage[i].repeated; +#else + mcfg->memseg[j].len = hugepage[i].size; +#endif + mcfg->memseg[j].socket_id = hugepage[i].socket_id; + mcfg->memseg[j].hugepage_sz = hugepage[i].size; + } + /* continuation of previous memseg */ + else { +#ifdef RTE_ARCH_PPC_64 + /* Use the phy and virt address of the last page as segment + * address for IBM Power architecture */ + mcfg->memseg[j].phys_addr = hugepage[i].physaddr; + mcfg->memseg[j].addr = hugepage[i].final_va; +#endif + mcfg->memseg[j].len += mcfg->memseg[j].hugepage_sz; + } + hugepage[i].memseg_id = j; + } + + if (i < nr_hugefiles) { + RTE_LOG(ERR, EAL, "Can only reserve %d pages " + "from %d requested\n" + "Current %s=%d is not enough\n" + "Please either increase it or request less amount " + "of memory.\n", + i, nr_hugefiles, RTE_STR(CONFIG_RTE_MAX_MEMSEG), + RTE_MAX_MEMSEG); + return -ENOMEM; + } + + return 0; + +fail: + free(tmp_hp); + return -1; +} + +/* + * uses fstat to report the size of a file on disk + */ +static off_t +getFileSize(int fd) +{ + struct stat st; + if (fstat(fd, &st) < 0) + return 0; + return st.st_size; +} + +/* + * This creates the memory mappings in the secondary process to match that of + * the server process. It goes through each memory segment in the DPDK runtime + * configuration and finds the hugepages which form that segment, mapping them + * in order to form a contiguous block in the virtual memory space + */ +int +rte_eal_hugepage_attach(void) +{ + const struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + const struct hugepage_file *hp = NULL; + unsigned num_hp = 0; + unsigned i, s = 0; /* s used to track the segment number */ + off_t size; + int fd, fd_zero = -1, fd_hugepage = -1; + + if (aslr_enabled() > 0) { + RTE_LOG(WARNING, EAL, "WARNING: Address Space Layout Randomization " + "(ASLR) is enabled in the kernel.\n"); + RTE_LOG(WARNING, EAL, " This may cause issues with mapping memory " + "into secondary processes\n"); + } + + test_proc_pagemap_readable(); + + if (internal_config.xen_dom0_support) { +#ifdef RTE_LIBRTE_XEN_DOM0 + if (rte_xen_dom0_memory_attach() < 0) { + RTE_LOG(ERR, EAL,"Failed to attach memory setments of primay " + "process\n"); + return -1; + } + return 0; +#endif + } + + fd_zero = open("/dev/zero", O_RDONLY); + if (fd_zero < 0) { + RTE_LOG(ERR, EAL, "Could not open /dev/zero\n"); + goto error; + } + fd_hugepage = open(eal_hugepage_info_path(), O_RDONLY); + if (fd_hugepage < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", eal_hugepage_info_path()); + goto error; + } + + /* map all segments into memory to make sure we get the addrs */ + for (s = 0; s < RTE_MAX_MEMSEG; ++s) { + void *base_addr; + + /* + * the first memory segment with len==0 is the one that + * follows the last valid segment. + */ + if (mcfg->memseg[s].len == 0) + break; + +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) + continue; +#endif + + /* + * fdzero is mmapped to get a contiguous block of virtual + * addresses of the appropriate memseg size. + * use mmap to get identical addresses as the primary process. + */ + base_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, + PROT_READ, MAP_PRIVATE, fd_zero, 0); + if (base_addr == MAP_FAILED || + base_addr != mcfg->memseg[s].addr) { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in /dev/zero to requested address [%p]: '%s'\n", + (unsigned long long)mcfg->memseg[s].len, + mcfg->memseg[s].addr, strerror(errno)); + if (aslr_enabled() > 0) { + RTE_LOG(ERR, EAL, "It is recommended to " + "disable ASLR in the kernel " + "and retry running both primary " + "and secondary processes\n"); + } + goto error; + } + } + + size = getFileSize(fd_hugepage); + hp = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd_hugepage, 0); + if (hp == NULL) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", eal_hugepage_info_path()); + goto error; + } + + num_hp = size / sizeof(struct hugepage_file); + RTE_LOG(DEBUG, EAL, "Analysing %u files\n", num_hp); + + s = 0; + while (s < RTE_MAX_MEMSEG && mcfg->memseg[s].len > 0){ + void *addr, *base_addr; + uintptr_t offset = 0; + size_t mapping_size; +#ifdef RTE_LIBRTE_IVSHMEM + /* + * if segment has ioremap address set, it's an IVSHMEM segment and + * doesn't need mapping as it was already mapped earlier + */ + if (mcfg->memseg[s].ioremap_addr != 0) { + s++; + continue; + } +#endif + /* + * free previously mapped memory so we can map the + * hugepages into the space + */ + base_addr = mcfg->memseg[s].addr; + munmap(base_addr, mcfg->memseg[s].len); + + /* find the hugepages for this segment and map them + * we don't need to worry about order, as the server sorted the + * entries before it did the second mmap of them */ + for (i = 0; i < num_hp && offset < mcfg->memseg[s].len; i++){ + if (hp[i].memseg_id == (int)s){ + fd = open(hp[i].filepath, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Could not open %s\n", + hp[i].filepath); + goto error; + } +#ifdef RTE_EAL_SINGLE_FILE_SEGMENTS + mapping_size = hp[i].size * hp[i].repeated; +#else + mapping_size = hp[i].size; +#endif + addr = mmap(RTE_PTR_ADD(base_addr, offset), + mapping_size, PROT_READ | PROT_WRITE, + MAP_SHARED, fd, 0); + close(fd); /* close file both on success and on failure */ + if (addr == MAP_FAILED || + addr != RTE_PTR_ADD(base_addr, offset)) { + RTE_LOG(ERR, EAL, "Could not mmap %s\n", + hp[i].filepath); + goto error; + } + offset+=mapping_size; + } + } + RTE_LOG(DEBUG, EAL, "Mapped segment %u of size 0x%llx\n", s, + (unsigned long long)mcfg->memseg[s].len); + s++; + } + /* unmap the hugepage config file, since we are done using it */ + munmap((void *)(uintptr_t)hp, size); + close(fd_zero); + close(fd_hugepage); + return 0; + +error: + if (fd_zero >= 0) + close(fd_zero); + if (fd_hugepage >= 0) + close(fd_hugepage); + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci.c b/lib/librte_eal/linuxapp/eal/eal_pci.c new file mode 100644 index 00000000..dbf12a84 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci.c @@ -0,0 +1,762 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include + +#include +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_private.h" +#include "eal_pci_init.h" + +/** + * @file + * PCI probing under linux + * + * This code is used to simulate a PCI probe by parsing information in sysfs. + * When a registered device matches a driver, it is then initialized with + * IGB_UIO driver (or doesn't initialize, if the device wasn't bound to it). + */ + +/* unbind kernel driver for this device */ +int +pci_unbind_kernel_driver(struct rte_pci_device *dev) +{ + int n; + FILE *f; + char filename[PATH_MAX]; + char buf[BUFSIZ]; + struct rte_pci_addr *loc = &dev->addr; + + /* open /sys/bus/pci/devices/AAAA:BB:CC.D/driver */ + snprintf(filename, sizeof(filename), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/driver/unbind", + loc->domain, loc->bus, loc->devid, loc->function); + + f = fopen(filename, "w"); + if (f == NULL) /* device was not bound */ + return 0; + + n = snprintf(buf, sizeof(buf), PCI_PRI_FMT "\n", + loc->domain, loc->bus, loc->devid, loc->function); + if ((n < 0) || (n >= (int)sizeof(buf))) { + RTE_LOG(ERR, EAL, "%s(): snprintf failed\n", __func__); + goto error; + } + if (fwrite(buf, n, 1, f) == 0) { + RTE_LOG(ERR, EAL, "%s(): could not write to %s\n", __func__, + filename); + goto error; + } + + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +static int +pci_get_kernel_driver_by_path(const char *filename, char *dri_name) +{ + int count; + char path[PATH_MAX]; + char *name; + + if (!filename || !dri_name) + return -1; + + count = readlink(filename, path, PATH_MAX); + if (count >= PATH_MAX) + return -1; + + /* For device does not have a driver */ + if (count < 0) + return 1; + + path[count] = '\0'; + + name = strrchr(path, '/'); + if (name) { + strncpy(dri_name, name + 1, strlen(name + 1) + 1); + return 0; + } + + return -1; +} + +/* Map pci device */ +int +rte_eal_pci_map_device(struct rte_pci_device *dev) +{ + int ret = -1; + + /* try mapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: +#ifdef VFIO_PRESENT + if (pci_vfio_is_enabled()) + ret = pci_vfio_map_resource(dev); +#endif + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* map resources for devices that use uio */ + ret = pci_uio_map_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + ret = 1; + break; + } + + return ret; +} + +/* Unmap pci device */ +void +rte_eal_pci_unmap_device(struct rte_pci_device *dev) +{ + /* try unmapping the NIC resources using VFIO if it exists */ + switch (dev->kdrv) { + case RTE_KDRV_VFIO: + RTE_LOG(ERR, EAL, "Hotplug doesn't support vfio yet\n"); + break; + case RTE_KDRV_IGB_UIO: + case RTE_KDRV_UIO_GENERIC: + /* unmap resources for devices that use uio */ + pci_uio_unmap_resource(dev); + break; + default: + RTE_LOG(DEBUG, EAL, + " Not managed by a supported kernel driver, skipped\n"); + break; + } +} + +void * +pci_find_max_end_va(void) +{ + const struct rte_memseg *seg = rte_eal_get_physmem_layout(); + const struct rte_memseg *last = seg; + unsigned i = 0; + + for (i = 0; i < RTE_MAX_MEMSEG; i++, seg++) { + if (seg->addr == NULL) + break; + + if (seg->addr > last->addr) + last = seg; + + } + return RTE_PTR_ADD(last->addr, last->len); +} + +/* parse the "resource" sysfs file */ +static int +pci_parse_sysfs_resource(const char *filename, struct rte_pci_device *dev) +{ + FILE *f; + char buf[BUFSIZ]; + union pci_resource_info { + struct { + char *phys_addr; + char *end_addr; + char *flags; + }; + char *ptrs[PCI_RESOURCE_FMT_NVAL]; + } res_info; + int i; + uint64_t phys_addr, end_addr, flags; + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "Cannot open sysfs resource\n"); + return -1; + } + + for (i = 0; imem_resource[i].phys_addr = phys_addr; + dev->mem_resource[i].len = end_addr - phys_addr + 1; + /* not mapped for now */ + dev->mem_resource[i].addr = NULL; + } + } + fclose(f); + return 0; + +error: + fclose(f); + return -1; +} + +/* Scan one pci sysfs entry, and fill the devices list from it. */ +static int +pci_scan_one(const char *dirname, uint16_t domain, uint8_t bus, + uint8_t devid, uint8_t function) +{ + char filename[PATH_MAX]; + unsigned long tmp; + struct rte_pci_device *dev; + char driver[PATH_MAX]; + int ret; + + dev = malloc(sizeof(*dev)); + if (dev == NULL) + return -1; + + memset(dev, 0, sizeof(*dev)); + dev->addr.domain = domain; + dev->addr.bus = bus; + dev->addr.devid = devid; + dev->addr.function = function; + + /* get vendor id */ + snprintf(filename, sizeof(filename), "%s/vendor", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.vendor_id = (uint16_t)tmp; + + /* get device id */ + snprintf(filename, sizeof(filename), "%s/device", dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.device_id = (uint16_t)tmp; + + /* get subsystem_vendor id */ + snprintf(filename, sizeof(filename), "%s/subsystem_vendor", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_vendor_id = (uint16_t)tmp; + + /* get subsystem_device id */ + snprintf(filename, sizeof(filename), "%s/subsystem_device", + dirname); + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->id.subsystem_device_id = (uint16_t)tmp; + + /* get max_vfs */ + dev->max_vfs = 0; + snprintf(filename, sizeof(filename), "%s/max_vfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + else { + /* for non igb_uio driver, need kernel version >= 3.8 */ + snprintf(filename, sizeof(filename), + "%s/sriov_numvfs", dirname); + if (!access(filename, F_OK) && + eal_parse_sysfs_value(filename, &tmp) == 0) + dev->max_vfs = (uint16_t)tmp; + } + + /* get numa node */ + snprintf(filename, sizeof(filename), "%s/numa_node", + dirname); + if (access(filename, R_OK) != 0) { + /* if no NUMA support, set default to 0 */ + dev->numa_node = 0; + } else { + if (eal_parse_sysfs_value(filename, &tmp) < 0) { + free(dev); + return -1; + } + dev->numa_node = tmp; + } + + /* parse resources */ + snprintf(filename, sizeof(filename), "%s/resource", dirname); + if (pci_parse_sysfs_resource(filename, dev) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse resource\n", __func__); + free(dev); + return -1; + } + + /* parse driver */ + snprintf(filename, sizeof(filename), "%s/driver", dirname); + ret = pci_get_kernel_driver_by_path(filename, driver); + if (ret < 0) { + RTE_LOG(ERR, EAL, "Fail to get kernel driver\n"); + free(dev); + return -1; + } + + if (!ret) { + if (!strcmp(driver, "vfio-pci")) + dev->kdrv = RTE_KDRV_VFIO; + else if (!strcmp(driver, "igb_uio")) + dev->kdrv = RTE_KDRV_IGB_UIO; + else if (!strcmp(driver, "uio_pci_generic")) + dev->kdrv = RTE_KDRV_UIO_GENERIC; + else + dev->kdrv = RTE_KDRV_UNKNOWN; + } else + dev->kdrv = RTE_KDRV_NONE; + + /* device is valid, add in list (sorted) */ + if (TAILQ_EMPTY(&pci_device_list)) { + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } else { + struct rte_pci_device *dev2; + int ret; + + TAILQ_FOREACH(dev2, &pci_device_list, next) { + ret = rte_eal_compare_pci_addr(&dev->addr, &dev2->addr); + if (ret > 0) + continue; + + if (ret < 0) { + TAILQ_INSERT_BEFORE(dev2, dev, next); + } else { /* already registered */ + dev2->kdrv = dev->kdrv; + dev2->max_vfs = dev->max_vfs; + memmove(dev2->mem_resource, dev->mem_resource, + sizeof(dev->mem_resource)); + free(dev); + } + return 0; + } + TAILQ_INSERT_TAIL(&pci_device_list, dev, next); + } + + return 0; +} + +/* + * split up a pci address into its constituent parts. + */ +static int +parse_pci_addr_format(const char *buf, int bufsize, uint16_t *domain, + uint8_t *bus, uint8_t *devid, uint8_t *function) +{ + /* first split on ':' */ + union splitaddr { + struct { + char *domain; + char *bus; + char *devid; + char *function; + }; + char *str[PCI_FMT_NVAL]; /* last element-separator is "." not ":" */ + } splitaddr; + + char *buf_copy = strndup(buf, bufsize); + if (buf_copy == NULL) + return -1; + + if (rte_strsplit(buf_copy, bufsize, splitaddr.str, PCI_FMT_NVAL, ':') + != PCI_FMT_NVAL - 1) + goto error; + /* final split is on '.' between devid and function */ + splitaddr.function = strchr(splitaddr.devid,'.'); + if (splitaddr.function == NULL) + goto error; + *splitaddr.function++ = '\0'; + + /* now convert to int values */ + errno = 0; + *domain = (uint16_t)strtoul(splitaddr.domain, NULL, 16); + *bus = (uint8_t)strtoul(splitaddr.bus, NULL, 16); + *devid = (uint8_t)strtoul(splitaddr.devid, NULL, 16); + *function = (uint8_t)strtoul(splitaddr.function, NULL, 10); + if (errno != 0) + goto error; + + free(buf_copy); /* free the copy made with strdup */ + return 0; +error: + free(buf_copy); + return -1; +} + +/* + * Scan the content of the PCI bus, and the devices in the devices + * list + */ +int +rte_eal_pci_scan(void) +{ + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + uint16_t domain; + uint8_t bus, devid, function; + + dir = opendir(SYSFS_PCI_DEVICES); + if (dir == NULL) { + RTE_LOG(ERR, EAL, "%s(): opendir failed: %s\n", + __func__, strerror(errno)); + return -1; + } + + while ((e = readdir(dir)) != NULL) { + if (e->d_name[0] == '.') + continue; + + if (parse_pci_addr_format(e->d_name, sizeof(e->d_name), &domain, + &bus, &devid, &function) != 0) + continue; + + snprintf(dirname, sizeof(dirname), "%s/%s", SYSFS_PCI_DEVICES, + e->d_name); + if (pci_scan_one(dirname, domain, bus, devid, function) < 0) + goto error; + } + closedir(dir); + return 0; + +error: + closedir(dir); + return -1; +} + +#ifdef RTE_PCI_CONFIG +/* + * It is deprecated, all its configurations have been moved into + * each PMD respectively. + */ +void +pci_config_space_set(__rte_unused struct rte_pci_device *dev) +{ + RTE_LOG(DEBUG, EAL, "Nothing here, as it is deprecated\n"); +} +#endif + +/* Read PCI config space. */ +int rte_eal_pci_read_config(const struct rte_pci_device *device, + void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_read_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_read_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +/* Write PCI config space. */ +int rte_eal_pci_write_config(const struct rte_pci_device *device, + const void *buf, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &device->intr_handle; + + switch (intr_handle->type) { + case RTE_INTR_HANDLE_UIO: + case RTE_INTR_HANDLE_UIO_INTX: + return pci_uio_write_config(intr_handle, buf, len, offset); + +#ifdef VFIO_PRESENT + case RTE_INTR_HANDLE_VFIO_MSIX: + case RTE_INTR_HANDLE_VFIO_MSI: + case RTE_INTR_HANDLE_VFIO_LEGACY: + return pci_vfio_write_config(intr_handle, buf, len, offset); +#endif + default: + RTE_LOG(ERR, EAL, + "Unknown handle type of fd %d\n", + intr_handle->fd); + return -1; + } +} + +#if defined(RTE_ARCH_X86) +static int +pci_ioport_map(struct rte_pci_device *dev, int bar __rte_unused, + struct rte_pci_ioport *p) +{ + uint16_t start, end; + FILE *fp; + char *line = NULL; + char pci_id[16]; + int found = 0; + size_t linesz; + + snprintf(pci_id, sizeof(pci_id), PCI_PRI_FMT, + dev->addr.domain, dev->addr.bus, + dev->addr.devid, dev->addr.function); + + fp = fopen("/proc/ioports", "r"); + if (fp == NULL) { + RTE_LOG(ERR, EAL, "%s(): can't open ioports\n", __func__); + return -1; + } + + while (getdelim(&line, &linesz, '\n', fp) > 0) { + char *ptr = line; + char *left; + int n; + + n = strcspn(ptr, ":"); + ptr[n] = 0; + left = &ptr[n + 1]; + + while (*left && isspace(*left)) + left++; + + if (!strncmp(left, pci_id, strlen(pci_id))) { + found = 1; + + while (*ptr && isspace(*ptr)) + ptr++; + + sscanf(ptr, "%04hx-%04hx", &start, &end); + + break; + } + } + + free(line); + fclose(fp); + + if (!found) + return -1; + + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + p->base = start; + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%x\n", start); + + return 0; +} +#endif + +int +rte_eal_pci_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_map(dev, bar, p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_map(dev, bar, p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#else + ret = pci_uio_ioport_map(dev, bar, p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = pci_ioport_map(dev, bar, p); +#endif + break; + default: + break; + } + + if (!ret) + p->dev = dev; + + return ret; +} + +void +rte_eal_pci_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_read(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_read(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_read(p, data, len, offset); +#endif + break; + default: + break; + } +} + +void +rte_eal_pci_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + pci_vfio_ioport_write(p, data, len, offset); + break; +#endif + case RTE_KDRV_IGB_UIO: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_UIO_GENERIC: + pci_uio_ioport_write(p, data, len, offset); + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + pci_uio_ioport_write(p, data, len, offset); +#endif + break; + default: + break; + } +} + +int +rte_eal_pci_ioport_unmap(struct rte_pci_ioport *p) +{ + int ret = -1; + + switch (p->dev->kdrv) { +#ifdef VFIO_PRESENT + case RTE_KDRV_VFIO: + if (pci_vfio_is_enabled()) + ret = pci_vfio_ioport_unmap(p); + break; +#endif + case RTE_KDRV_IGB_UIO: + ret = pci_uio_ioport_unmap(p); + break; + case RTE_KDRV_UIO_GENERIC: +#if defined(RTE_ARCH_X86) + ret = 0; +#else + ret = pci_uio_ioport_unmap(p); +#endif + break; + case RTE_KDRV_NONE: +#if defined(RTE_ARCH_X86) + ret = 0; +#endif + break; + default: + break; + } + + return ret; +} + +/* Init the PCI EAL subsystem */ +int +rte_eal_pci_init(void) +{ + TAILQ_INIT(&pci_driver_list); + TAILQ_INIT(&pci_device_list); + + /* for debug purposes, PCI can be disabled */ + if (internal_config.no_pci) + return 0; + + if (rte_eal_pci_scan() < 0) { + RTE_LOG(ERR, EAL, "%s(): Cannot scan PCI bus\n", __func__); + return -1; + } +#ifdef VFIO_PRESENT + pci_vfio_enable(); + + if (pci_vfio_is_enabled()) { + + /* if we are primary process, create a thread to communicate with + * secondary processes. the thread will use a socket to wait for + * requests from secondary process to send open file descriptors, + * because VFIO does not allow multiple open descriptors on a group or + * VFIO container. + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + pci_vfio_mp_sync_setup() < 0) + return -1; + } +#endif + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_init.h b/lib/librte_eal/linuxapp/eal/eal_pci_init.h new file mode 100644 index 00000000..7011753d --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_init.h @@ -0,0 +1,127 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_PCI_INIT_H_ +#define EAL_PCI_INIT_H_ + +#include "eal_vfio.h" + +/* + * Helper function to map PCI resources right after hugepages in virtual memory + */ +extern void *pci_map_addr; +void *pci_find_max_end_va(void); + +int pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res); +void pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res); +int pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx); + +int pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_uio_ioport_unmap(struct rte_pci_ioport *p); + +#ifdef VFIO_PRESENT + +#define VFIO_MAX_GROUPS 64 + +int pci_vfio_enable(void); +int pci_vfio_is_enabled(void); +int pci_vfio_mp_sync_setup(void); + +/* access config space */ +int pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs); +int pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs); + +int pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p); +void pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset); +void pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset); +int pci_vfio_ioport_unmap(struct rte_pci_ioport *p); + +/* map VFIO resource prototype */ +int pci_vfio_map_resource(struct rte_pci_device *dev); +int pci_vfio_get_group_fd(int iommu_group_fd); +int pci_vfio_get_container_fd(void); + +/* + * Function prototypes for VFIO multiprocess sync functions + */ +int vfio_mp_sync_send_request(int socket, int req); +int vfio_mp_sync_receive_request(int socket); +int vfio_mp_sync_send_fd(int socket, int fd); +int vfio_mp_sync_receive_fd(int socket); +int vfio_mp_sync_connect_to_primary(void); + +/* socket comm protocol definitions */ +#define SOCKET_REQ_CONTAINER 0x100 +#define SOCKET_REQ_GROUP 0x200 +#define SOCKET_OK 0x0 +#define SOCKET_NO_FD 0x1 +#define SOCKET_ERR 0xFF + +/* + * we don't need to store device fd's anywhere since they can be obtained from + * the group fd via an ioctl() call. + */ +struct vfio_group { + int group_no; + int fd; +}; + +struct vfio_config { + int vfio_enabled; + int vfio_container_fd; + int vfio_container_has_dma; + int vfio_group_idx; + struct vfio_group vfio_groups[VFIO_MAX_GROUPS]; +}; + +#endif + +#endif /* EAL_PCI_INIT_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_uio.c b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c new file mode 100644 index 00000000..068694dc --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_uio.c @@ -0,0 +1,491 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#if defined(RTE_ARCH_X86) +#include +#endif + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" + +void *pci_map_addr = NULL; + +#define OFF_MAX ((uint64_t)(off_t)-1) + +int +pci_uio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offset) +{ + return pread(intr_handle->uio_cfg_fd, buf, len, offset); +} + +int +pci_uio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offset) +{ + return pwrite(intr_handle->uio_cfg_fd, buf, len, offset); +} + +static int +pci_uio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot read command from PCI config space!\n"); + return -1; + } + + /* return if bus mastering is already on */ + if (reg & PCI_COMMAND_MASTER) + return 0; + + reg |= PCI_COMMAND_MASTER; + + ret = pwrite(dev_fd, ®, sizeof(reg), PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, + "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +static int +pci_mknod_uio_dev(const char *sysfs_uio_path, unsigned uio_num) +{ + FILE *f; + char filename[PATH_MAX]; + int ret; + unsigned major, minor; + dev_t dev; + + /* get the name of the sysfs file that contains the major and minor + * of the uio device and read its content */ + snprintf(filename, sizeof(filename), "%s/dev", sysfs_uio_path); + + f = fopen(filename, "r"); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): cannot open sysfs to get major:minor\n", + __func__); + return -1; + } + + ret = fscanf(f, "%u:%u", &major, &minor); + if (ret != 2) { + RTE_LOG(ERR, EAL, "%s(): cannot parse sysfs to get major:minor\n", + __func__); + fclose(f); + return -1; + } + fclose(f); + + /* create the char device "mknod /dev/uioX c major minor" */ + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev = makedev(major, minor); + ret = mknod(filename, S_IFCHR | S_IRUSR | S_IWUSR, dev); + if (f == NULL) { + RTE_LOG(ERR, EAL, "%s(): mknod() failed %s\n", + __func__, strerror(errno)); + return -1; + } + + return ret; +} + +/* + * Return the uioX char device used for a pci device. On success, return + * the UIO number and fill dstbuf string with the path of the device in + * sysfs. On error, return a negative value. In this case dstbuf is + * invalid. + */ +static int +pci_get_uio_dev(struct rte_pci_device *dev, char *dstbuf, + unsigned int buflen, int create) +{ + struct rte_pci_addr *loc = &dev->addr; + unsigned int uio_num; + struct dirent *e; + DIR *dir; + char dirname[PATH_MAX]; + + /* depending on kernel version, uio can be located in uio/uioX + * or uio:uioX */ + + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/uio", + loc->domain, loc->bus, loc->devid, loc->function); + + dir = opendir(dirname); + if (dir == NULL) { + /* retry with the parent directory */ + snprintf(dirname, sizeof(dirname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + dir = opendir(dirname); + + if (dir == NULL) { + RTE_LOG(ERR, EAL, "Cannot opendir %s\n", dirname); + return -1; + } + } + + /* take the first file starting with "uio" */ + while ((e = readdir(dir)) != NULL) { + /* format could be uio%d ...*/ + int shortprefix_len = sizeof("uio") - 1; + /* ... or uio:uio%d */ + int longprefix_len = sizeof("uio:uio") - 1; + char *endptr; + + if (strncmp(e->d_name, "uio", 3) != 0) + continue; + + /* first try uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + shortprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + shortprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio%u", dirname, uio_num); + break; + } + + /* then try uio:uio%d */ + errno = 0; + uio_num = strtoull(e->d_name + longprefix_len, &endptr, 10); + if (errno == 0 && endptr != (e->d_name + longprefix_len)) { + snprintf(dstbuf, buflen, "%s/uio:uio%u", dirname, uio_num); + break; + } + } + closedir(dir); + + /* No uio resource found */ + if (e == NULL) + return -1; + + /* create uio device if we've been asked to */ + if (internal_config.create_uio_dev && create && + pci_mknod_uio_dev(dstbuf, uio_num) < 0) + RTE_LOG(WARNING, EAL, "Cannot create /dev/uio%u\n", uio_num); + + return uio_num; +} + +void +pci_uio_free_resource(struct rte_pci_device *dev, + struct mapped_pci_resource *uio_res) +{ + rte_free(uio_res); + + if (dev->intr_handle.uio_cfg_fd >= 0) { + close(dev->intr_handle.uio_cfg_fd); + dev->intr_handle.uio_cfg_fd = -1; + } + if (dev->intr_handle.fd) { + close(dev->intr_handle.fd); + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + } +} + +int +pci_uio_alloc_resource(struct rte_pci_device *dev, + struct mapped_pci_resource **uio_res) +{ + char dirname[PATH_MAX]; + char cfgname[PATH_MAX]; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + int uio_num; + struct rte_pci_addr *loc; + + loc = &dev->addr; + + /* find uio resource */ + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 1); + if (uio_num < 0) { + RTE_LOG(WARNING, EAL, " "PCI_PRI_FMT" not managed by UIO driver, " + "skipping\n", loc->domain, loc->bus, loc->devid, loc->function); + return 1; + } + snprintf(devname, sizeof(devname), "/dev/uio%u", uio_num); + + /* save fd if in primary process */ + dev->intr_handle.fd = open(devname, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + snprintf(cfgname, sizeof(cfgname), + "/sys/class/uio/uio%u/device/config", uio_num); + dev->intr_handle.uio_cfg_fd = open(cfgname, O_RDWR); + if (dev->intr_handle.uio_cfg_fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + cfgname, strerror(errno)); + goto error; + } + + if (dev->kdrv == RTE_KDRV_IGB_UIO) + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + else { + dev->intr_handle.type = RTE_INTR_HANDLE_UIO_INTX; + + /* set bus master that is not done by uio_pci_generic */ + if (pci_uio_set_bus_master(dev->intr_handle.uio_cfg_fd)) { + RTE_LOG(ERR, EAL, "Cannot set up bus mastering!\n"); + goto error; + } + } + + /* allocate the mapping details for secondary processes*/ + *uio_res = rte_zmalloc("UIO_RES", sizeof(**uio_res), 0); + if (*uio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + goto error; + } + + snprintf((*uio_res)->path, sizeof((*uio_res)->path), "%s", devname); + memcpy(&(*uio_res)->pci_addr, &dev->addr, sizeof((*uio_res)->pci_addr)); + + return 0; + +error: + pci_uio_free_resource(dev, *uio_res); + return -1; +} + +int +pci_uio_map_resource_by_index(struct rte_pci_device *dev, int res_idx, + struct mapped_pci_resource *uio_res, int map_idx) +{ + int fd; + char devname[PATH_MAX]; /* contains the /dev/uioX */ + void *mapaddr; + struct rte_pci_addr *loc; + struct pci_map *maps; + + loc = &dev->addr; + maps = uio_res->maps; + + /* update devname for mmap */ + snprintf(devname, sizeof(devname), + SYSFS_PCI_DEVICES "/" PCI_PRI_FMT "/resource%d", + loc->domain, loc->bus, loc->devid, + loc->function, res_idx); + + /* allocate memory to keep path */ + maps[map_idx].path = rte_malloc(NULL, strlen(devname) + 1, 0); + if (maps[map_idx].path == NULL) { + RTE_LOG(ERR, EAL, "Cannot allocate memory for path: %s\n", + strerror(errno)); + return -1; + } + + /* + * open resource file, to mmap it + */ + fd = open(devname, O_RDWR); + if (fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + devname, strerror(errno)); + goto error; + } + + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + mapaddr = pci_map_resource(pci_map_addr, fd, 0, + (size_t)dev->mem_resource[res_idx].len, 0); + close(fd); + if (mapaddr == MAP_FAILED) + goto error; + + pci_map_addr = RTE_PTR_ADD(mapaddr, + (size_t)dev->mem_resource[res_idx].len); + + maps[map_idx].phaddr = dev->mem_resource[res_idx].phys_addr; + maps[map_idx].size = dev->mem_resource[res_idx].len; + maps[map_idx].addr = mapaddr; + maps[map_idx].offset = 0; + strcpy(maps[map_idx].path, devname); + dev->mem_resource[res_idx].addr = mapaddr; + + return 0; + +error: + rte_free(maps[map_idx].path); + return -1; +} + +int +pci_uio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ +#if defined(RTE_ARCH_X86) + char dirname[PATH_MAX]; + char filename[PATH_MAX]; + int uio_num; + unsigned long start; + + uio_num = pci_get_uio_dev(dev, dirname, sizeof(dirname), 0); + if (uio_num < 0) + return -1; + + /* get portio start */ + snprintf(filename, sizeof(filename), + "%s/portio/port%d/start", dirname, bar); + if (eal_parse_sysfs_value(filename, &start) < 0) { + RTE_LOG(ERR, EAL, "%s(): cannot parse portio start\n", + __func__); + return -1; + } + /* ensure we don't get anything funny here, read/write will cast to + * uin16_t */ + if (start > UINT16_MAX) + return -1; + + /* FIXME only for primary process ? */ + if (dev->intr_handle.type == RTE_INTR_HANDLE_UNKNOWN) { + + snprintf(filename, sizeof(filename), "/dev/uio%u", uio_num); + dev->intr_handle.fd = open(filename, O_RDWR); + if (dev->intr_handle.fd < 0) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", + filename, strerror(errno)); + return -1; + } + dev->intr_handle.type = RTE_INTR_HANDLE_UIO; + } + + RTE_LOG(DEBUG, EAL, "PCI Port IO found start=0x%lx\n", start); + + p->base = start; + return 0; +#else + RTE_SET_USED(dev); + RTE_SET_USED(bar); + RTE_SET_USED(p); + return -1; +#endif +} + +void +pci_uio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + uint8_t *d; + int size; + unsigned short reg = p->base + offset; + + for (d = data; len > 0; d += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + *(uint32_t *)d = inl(reg); + } else if (len >= 2) { + size = 2; + *(uint16_t *)d = inw(reg); + } else { + size = 1; + *d = inb(reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +void +pci_uio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ +#if defined(RTE_ARCH_X86) + const uint8_t *s; + int size; + unsigned short reg = p->base + offset; + + for (s = data; len > 0; s += size, reg += size, len -= size) { + if (len >= 4) { + size = 4; + outl_p(*(const uint32_t *)s, reg); + } else if (len >= 2) { + size = 2; + outw_p(*(const uint16_t *)s, reg); + } else { + size = 1; + outb_p(*s, reg); + } + } +#else + RTE_SET_USED(p); + RTE_SET_USED(data); + RTE_SET_USED(len); + RTE_SET_USED(offset); +#endif +} + +int +pci_uio_ioport_unmap(struct rte_pci_ioport *p) +{ + RTE_SET_USED(p); +#if defined(RTE_ARCH_X86) + /* FIXME close intr fd ? */ + return 0; +#else + return -1; +#endif +} diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c new file mode 100644 index 00000000..10266f8f --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio.c @@ -0,0 +1,1097 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" +#include "eal_vfio.h" + +/** + * @file + * PCI probing under linux (VFIO version) + * + * This code tries to determine if the PCI device is bound to VFIO driver, + * and initialize it (map BARs, set up interrupts) if that's the case. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define PAGE_SIZE (sysconf(_SC_PAGESIZE)) +#define PAGE_MASK (~(PAGE_SIZE - 1)) + +static struct rte_tailq_elem rte_vfio_tailq = { + .name = "VFIO_RESOURCE_LIST", +}; +EAL_REGISTER_TAILQ(rte_vfio_tailq) + +#define VFIO_DIR "/dev/vfio" +#define VFIO_CONTAINER_PATH "/dev/vfio/vfio" +#define VFIO_GROUP_FMT "/dev/vfio/%u" +#define VFIO_NOIOMMU_GROUP_FMT "/dev/vfio/noiommu-%u" +#define VFIO_GET_REGION_ADDR(x) ((uint64_t) x << 40ULL) +#define VFIO_GET_REGION_IDX(x) (x >> 40) + +/* per-process VFIO config */ +static struct vfio_config vfio_cfg; + +/* DMA mapping function prototype. + * Takes VFIO container fd as a parameter. + * Returns 0 on success, -1 on error. + * */ +typedef int (*vfio_dma_func_t)(int); + +struct vfio_iommu_type { + int type_id; + const char *name; + vfio_dma_func_t dma_map_func; +}; + +static int vfio_type1_dma_map(int); +static int vfio_noiommu_dma_map(int); + +/* IOMMU types we support */ +static const struct vfio_iommu_type iommu_types[] = { + /* x86 IOMMU, otherwise known as type 1 */ + { RTE_VFIO_TYPE1, "Type 1", &vfio_type1_dma_map}, + /* IOMMU-less mode */ + { RTE_VFIO_NOIOMMU, "No-IOMMU", &vfio_noiommu_dma_map}, +}; + +int +vfio_type1_dma_map(int vfio_container_fd) +{ + const struct rte_memseg *ms = rte_eal_get_physmem_layout(); + int i, ret; + + /* map all DPDK segments for DMA. use 1:1 PA to IOVA mapping */ + for (i = 0; i < RTE_MAX_MEMSEG; i++) { + struct vfio_iommu_type1_dma_map dma_map; + + if (ms[i].addr == NULL) + break; + + memset(&dma_map, 0, sizeof(dma_map)); + dma_map.argsz = sizeof(struct vfio_iommu_type1_dma_map); + dma_map.vaddr = ms[i].addr_64; + dma_map.size = ms[i].len; + dma_map.iova = ms[i].phys_addr; + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + ret = ioctl(vfio_container_fd, VFIO_IOMMU_MAP_DMA, &dma_map); + + if (ret) { + RTE_LOG(ERR, EAL, " cannot set up DMA remapping, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + } + + return 0; +} + +int +vfio_noiommu_dma_map(int __rte_unused vfio_container_fd) +{ + /* No-IOMMU mode does not need DMA mapping */ + return 0; +} + +int +pci_vfio_read_config(const struct rte_intr_handle *intr_handle, + void *buf, size_t len, off_t offs) +{ + return pread64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +int +pci_vfio_write_config(const struct rte_intr_handle *intr_handle, + const void *buf, size_t len, off_t offs) +{ + return pwrite64(intr_handle->vfio_dev_fd, buf, len, + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + offs); +} + +/* get PCI BAR number where MSI-X interrupts are */ +static int +pci_vfio_get_msix_bar(int fd, int *msix_bar, uint32_t *msix_table_offset, + uint32_t *msix_table_size) +{ + int ret; + uint32_t reg; + uint16_t flags; + uint8_t cap_id, cap_offset; + + /* read PCI capability pointer from config space */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_CAPABILITY_LIST); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_offset = reg & 0xFF; + + while (cap_offset) { + + /* read PCI capability ID */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability ID from PCI " + "config space!\n"); + return -1; + } + + /* we need first byte */ + cap_id = reg & 0xFF; + + /* if we haven't reached MSI-X, check next capability */ + if (cap_id != PCI_CAP_ID_MSIX) { + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read capability pointer from PCI " + "config space!\n"); + return -1; + } + + /* we need second byte */ + cap_offset = (reg & 0xFF00) >> 8; + + continue; + } + /* else, read table offset */ + else { + /* table offset resides in the next 4 bytes */ + ret = pread64(fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 4); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read table offset from PCI config " + "space!\n"); + return -1; + } + + ret = pread64(fd, &flags, sizeof(flags), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + cap_offset + 2); + if (ret != sizeof(flags)) { + RTE_LOG(ERR, EAL, "Cannot read table flags from PCI config " + "space!\n"); + return -1; + } + + *msix_bar = reg & RTE_PCI_MSIX_TABLE_BIR; + *msix_table_offset = reg & RTE_PCI_MSIX_TABLE_OFFSET; + *msix_table_size = 16 * (1 + (flags & RTE_PCI_MSIX_FLAGS_QSIZE)); + + return 0; + } + } + return 0; +} + +/* set PCI bus mastering */ +static int +pci_vfio_set_bus_master(int dev_fd) +{ + uint16_t reg; + int ret; + + ret = pread64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot read command from PCI config space!\n"); + return -1; + } + + /* set the master bit */ + reg |= PCI_COMMAND_MASTER; + + ret = pwrite64(dev_fd, ®, sizeof(reg), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_COMMAND); + + if (ret != sizeof(reg)) { + RTE_LOG(ERR, EAL, "Cannot write command to PCI config space!\n"); + return -1; + } + + return 0; +} + +/* pick IOMMU type. returns a pointer to vfio_iommu_type or NULL for error */ +static const struct vfio_iommu_type * +pci_vfio_set_iommu_type(int vfio_container_fd) { + unsigned idx; + for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { + const struct vfio_iommu_type *t = &iommu_types[idx]; + + int ret = ioctl(vfio_container_fd, VFIO_SET_IOMMU, + t->type_id); + if (!ret) { + RTE_LOG(NOTICE, EAL, " using IOMMU type %d (%s)\n", + t->type_id, t->name); + return t; + } + /* not an error, there may be more supported IOMMU types */ + RTE_LOG(DEBUG, EAL, " set IOMMU type %d (%s) failed, " + "error %i (%s)\n", t->type_id, t->name, errno, + strerror(errno)); + } + /* if we didn't find a suitable IOMMU type, fail */ + return NULL; +} + +/* check if we have any supported extensions */ +static int +pci_vfio_has_supported_extensions(int vfio_container_fd) { + int ret; + unsigned idx, n_extensions = 0; + for (idx = 0; idx < RTE_DIM(iommu_types); idx++) { + const struct vfio_iommu_type *t = &iommu_types[idx]; + + ret = ioctl(vfio_container_fd, VFIO_CHECK_EXTENSION, + t->type_id); + if (ret < 0) { + RTE_LOG(ERR, EAL, " could not get IOMMU type, " + "error %i (%s)\n", errno, + strerror(errno)); + close(vfio_container_fd); + return -1; + } else if (ret == 1) { + /* we found a supported extension */ + n_extensions++; + } + RTE_LOG(DEBUG, EAL, " IOMMU type %d (%s) is %s\n", + t->type_id, t->name, + ret ? "supported" : "not supported"); + } + + /* if we didn't find any supported IOMMU types, fail */ + if (!n_extensions) { + close(vfio_container_fd); + return -1; + } + + return 0; +} + +/* set up interrupt support (but not enable interrupts) */ +static int +pci_vfio_setup_interrupts(struct rte_pci_device *dev, int vfio_dev_fd) +{ + int i, ret, intr_idx; + + /* default to invalid index */ + intr_idx = VFIO_PCI_NUM_IRQS; + + /* get interrupt type from internal config (MSI-X by default, can be + * overriden from the command line + */ + switch (internal_config.vfio_intr_mode) { + case RTE_INTR_MODE_MSIX: + intr_idx = VFIO_PCI_MSIX_IRQ_INDEX; + break; + case RTE_INTR_MODE_MSI: + intr_idx = VFIO_PCI_MSI_IRQ_INDEX; + break; + case RTE_INTR_MODE_LEGACY: + intr_idx = VFIO_PCI_INTX_IRQ_INDEX; + break; + /* don't do anything if we want to automatically determine interrupt type */ + case RTE_INTR_MODE_NONE: + break; + default: + RTE_LOG(ERR, EAL, " unknown default interrupt type!\n"); + return -1; + } + + /* start from MSI-X interrupt type */ + for (i = VFIO_PCI_MSIX_IRQ_INDEX; i >= 0; i--) { + struct vfio_irq_info irq = { .argsz = sizeof(irq) }; + int fd = -1; + + /* skip interrupt modes we don't want */ + if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE && + i != intr_idx) + continue; + + irq.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_IRQ_INFO, &irq); + if (ret < 0) { + RTE_LOG(ERR, EAL, " cannot get IRQ info, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* if this vector cannot be used with eventfd, fail if we explicitly + * specified interrupt type, otherwise continue */ + if ((irq.flags & VFIO_IRQ_INFO_EVENTFD) == 0) { + if (internal_config.vfio_intr_mode != RTE_INTR_MODE_NONE) { + RTE_LOG(ERR, EAL, + " interrupt vector does not support eventfd!\n"); + return -1; + } else + continue; + } + + /* set up an eventfd for interrupts */ + fd = eventfd(0, EFD_NONBLOCK | EFD_CLOEXEC); + if (fd < 0) { + RTE_LOG(ERR, EAL, " cannot set up eventfd, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + dev->intr_handle.fd = fd; + dev->intr_handle.vfio_dev_fd = vfio_dev_fd; + + switch (i) { + case VFIO_PCI_MSIX_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_MSIX; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSIX; + break; + case VFIO_PCI_MSI_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_MSI; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_MSI; + break; + case VFIO_PCI_INTX_IRQ_INDEX: + internal_config.vfio_intr_mode = RTE_INTR_MODE_LEGACY; + dev->intr_handle.type = RTE_INTR_HANDLE_VFIO_LEGACY; + break; + default: + RTE_LOG(ERR, EAL, " unknown interrupt type!\n"); + return -1; + } + + return 0; + } + + /* if we're here, we haven't found a suitable interrupt vector */ + return -1; +} + +/* open container fd or get an existing one */ +int +pci_vfio_get_container_fd(void) +{ + int ret, vfio_container_fd; + + /* if we're in a primary process, try to open the container */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + vfio_container_fd = open(VFIO_CONTAINER_PATH, O_RDWR); + if (vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, " cannot open VFIO container, " + "error %i (%s)\n", errno, strerror(errno)); + return -1; + } + + /* check VFIO API version */ + ret = ioctl(vfio_container_fd, VFIO_GET_API_VERSION); + if (ret != VFIO_API_VERSION) { + if (ret < 0) + RTE_LOG(ERR, EAL, " could not get VFIO API version, " + "error %i (%s)\n", errno, strerror(errno)); + else + RTE_LOG(ERR, EAL, " unsupported VFIO API version!\n"); + close(vfio_container_fd); + return -1; + } + + ret = pci_vfio_has_supported_extensions(vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " no supported IOMMU " + "extensions found!\n"); + return -1; + } + + return vfio_container_fd; + } else { + /* + * if we're in a secondary process, request container fd from the + * primary process via our socket + */ + int socket_fd; + + socket_fd = vfio_mp_sync_connect_to_primary(); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_CONTAINER) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + vfio_container_fd = vfio_mp_sync_receive_fd(socket_fd); + if (vfio_container_fd < 0) { + RTE_LOG(ERR, EAL, " cannot get container fd!\n"); + close(socket_fd); + return -1; + } + close(socket_fd); + return vfio_container_fd; + } + + return -1; +} + +/* open group fd or get an existing one */ +int +pci_vfio_get_group_fd(int iommu_group_no) +{ + int i; + int vfio_group_fd; + char filename[PATH_MAX]; + + /* check if we already have the group descriptor open */ + for (i = 0; i < vfio_cfg.vfio_group_idx; i++) + if (vfio_cfg.vfio_groups[i].group_no == iommu_group_no) + return vfio_cfg.vfio_groups[i].fd; + + /* if primary, try to open the group */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + /* try regular group format */ + snprintf(filename, sizeof(filename), + VFIO_GROUP_FMT, iommu_group_no); + vfio_group_fd = open(filename, O_RDWR); + if (vfio_group_fd < 0) { + /* if file not found, it's not an error */ + if (errno != ENOENT) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + return -1; + } + + /* special case: try no-IOMMU path as well */ + snprintf(filename, sizeof(filename), + VFIO_NOIOMMU_GROUP_FMT, iommu_group_no); + vfio_group_fd = open(filename, O_RDWR); + if (vfio_group_fd < 0) { + if (errno != ENOENT) { + RTE_LOG(ERR, EAL, "Cannot open %s: %s\n", filename, + strerror(errno)); + return -1; + } + return 0; + } + /* noiommu group found */ + } + + /* if the fd is valid, create a new group for it */ + if (vfio_cfg.vfio_group_idx == VFIO_MAX_GROUPS) { + RTE_LOG(ERR, EAL, "Maximum number of VFIO groups reached!\n"); + close(vfio_group_fd); + return -1; + } + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + return vfio_group_fd; + } + /* if we're in a secondary process, request group fd from the primary + * process via our socket + */ + else { + int socket_fd, ret; + + socket_fd = vfio_mp_sync_connect_to_primary(); + + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, " cannot connect to primary process!\n"); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, SOCKET_REQ_GROUP) < 0) { + RTE_LOG(ERR, EAL, " cannot request container fd!\n"); + close(socket_fd); + return -1; + } + if (vfio_mp_sync_send_request(socket_fd, iommu_group_no) < 0) { + RTE_LOG(ERR, EAL, " cannot send group number!\n"); + close(socket_fd); + return -1; + } + ret = vfio_mp_sync_receive_request(socket_fd); + switch (ret) { + case SOCKET_NO_FD: + close(socket_fd); + return 0; + case SOCKET_OK: + vfio_group_fd = vfio_mp_sync_receive_fd(socket_fd); + /* if we got the fd, return it */ + if (vfio_group_fd > 0) { + close(socket_fd); + return vfio_group_fd; + } + /* fall-through on error */ + default: + RTE_LOG(ERR, EAL, " cannot get container fd!\n"); + close(socket_fd); + return -1; + } + } + return -1; +} + +/* parse IOMMU group number for a PCI device + * returns 1 on success, -1 for errors, 0 for non-existent group + */ +static int +pci_vfio_get_group_no(const char *pci_addr, int *iommu_group_no) +{ + char linkname[PATH_MAX]; + char filename[PATH_MAX]; + char *tok[16], *group_tok, *end; + int ret; + + memset(linkname, 0, sizeof(linkname)); + memset(filename, 0, sizeof(filename)); + + /* try to find out IOMMU group for this device */ + snprintf(linkname, sizeof(linkname), + SYSFS_PCI_DEVICES "/%s/iommu_group", pci_addr); + + ret = readlink(linkname, filename, sizeof(filename)); + + /* if the link doesn't exist, no VFIO for us */ + if (ret < 0) + return 0; + + ret = rte_strsplit(filename, sizeof(filename), + tok, RTE_DIM(tok), '/'); + + if (ret <= 0) { + RTE_LOG(ERR, EAL, " %s cannot get IOMMU group\n", pci_addr); + return -1; + } + + /* IOMMU group is always the last token */ + errno = 0; + group_tok = tok[ret - 1]; + end = group_tok; + *iommu_group_no = strtol(group_tok, &end, 10); + if ((end != group_tok && *end != '\0') || errno != 0) { + RTE_LOG(ERR, EAL, " %s error parsing IOMMU number!\n", pci_addr); + return -1; + } + + return 1; +} + +static void +clear_current_group(void) +{ + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = 0; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = -1; +} + + +/* + * map the PCI resources of a PCI device in virtual memory (VFIO version). + * primary and secondary processes follow almost exactly the same path + */ +int +pci_vfio_map_resource(struct rte_pci_device *dev) +{ + struct vfio_group_status group_status = { + .argsz = sizeof(group_status) + }; + struct vfio_device_info device_info = { .argsz = sizeof(device_info) }; + int vfio_group_fd, vfio_dev_fd; + int iommu_group_no; + char pci_addr[PATH_MAX] = {0}; + struct rte_pci_addr *loc = &dev->addr; + int i, ret, msix_bar; + struct mapped_pci_resource *vfio_res = NULL; + struct mapped_pci_res_list *vfio_res_list = RTE_TAILQ_CAST(rte_vfio_tailq.head, mapped_pci_res_list); + + struct pci_map *maps; + uint32_t msix_table_offset = 0; + uint32_t msix_table_size = 0; + uint32_t ioport_bar; + + dev->intr_handle.fd = -1; + dev->intr_handle.type = RTE_INTR_HANDLE_UNKNOWN; + + /* store PCI address string */ + snprintf(pci_addr, sizeof(pci_addr), PCI_PRI_FMT, + loc->domain, loc->bus, loc->devid, loc->function); + + /* get group number */ + ret = pci_vfio_get_group_no(pci_addr, &iommu_group_no); + if (ret == 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + return 1; + } + + /* if negative, something failed */ + if (ret < 0) + return -1; + + /* get the actual group fd */ + vfio_group_fd = pci_vfio_get_group_fd(iommu_group_no); + if (vfio_group_fd < 0) + return -1; + + /* store group fd */ + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].group_no = iommu_group_no; + vfio_cfg.vfio_groups[vfio_cfg.vfio_group_idx].fd = vfio_group_fd; + + /* if group_fd == 0, that means the device isn't managed by VFIO */ + if (vfio_group_fd == 0) { + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + /* we store 0 as group fd to distinguish between existing but + * unbound VFIO groups, and groups that don't exist at all. + */ + vfio_cfg.vfio_group_idx++; + return 1; + } + + /* + * at this point, we know at least one port on this device is bound to VFIO, + * so we can proceed to try and set this particular port up + */ + + /* check if the group is viable */ + ret = ioctl(vfio_group_fd, VFIO_GROUP_GET_STATUS, &group_status); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get group status, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_current_group(); + return -1; + } else if (!(group_status.flags & VFIO_GROUP_FLAGS_VIABLE)) { + RTE_LOG(ERR, EAL, " %s VFIO group is not viable!\n", pci_addr); + close(vfio_group_fd); + clear_current_group(); + return -1; + } + + /* + * at this point, we know that this group is viable (meaning, all devices + * are either bound to VFIO or not bound to anything) + */ + + /* check if group does not have a container yet */ + if (!(group_status.flags & VFIO_GROUP_FLAGS_CONTAINER_SET)) { + + /* add group to a container */ + ret = ioctl(vfio_group_fd, VFIO_GROUP_SET_CONTAINER, + &vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot add VFIO group to container, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_group_fd); + clear_current_group(); + return -1; + } + /* + * at this point we know that this group has been successfully + * initialized, so we increment vfio_group_idx to indicate that we can + * add new groups. + */ + vfio_cfg.vfio_group_idx++; + } + + /* + * pick an IOMMU type and set up DMA mappings for container + * + * needs to be done only once, only when at least one group is assigned to + * a container and only in primary process + */ + if (internal_config.process_type == RTE_PROC_PRIMARY && + vfio_cfg.vfio_container_has_dma == 0) { + /* select an IOMMU type which we will be using */ + const struct vfio_iommu_type *t = + pci_vfio_set_iommu_type(vfio_cfg.vfio_container_fd); + if (!t) { + RTE_LOG(ERR, EAL, " %s failed to select IOMMU type\n", pci_addr); + return -1; + } + ret = t->dma_map_func(vfio_cfg.vfio_container_fd); + if (ret) { + RTE_LOG(ERR, EAL, " %s DMA remapping failed, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + return -1; + } + vfio_cfg.vfio_container_has_dma = 1; + } + + /* get a file descriptor for the device */ + vfio_dev_fd = ioctl(vfio_group_fd, VFIO_GROUP_GET_DEVICE_FD, pci_addr); + if (vfio_dev_fd < 0) { + /* if we cannot get a device fd, this simply means that this + * particular port is not bound to VFIO + */ + RTE_LOG(WARNING, EAL, " %s not managed by VFIO driver, skipping\n", + pci_addr); + return 1; + } + + /* test and setup the device */ + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_INFO, &device_info); + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device info, " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_dev_fd); + return -1; + } + + /* get MSI-X BAR, if any (we have to know where it is because we can't + * easily mmap it when using VFIO) */ + msix_bar = -1; + ret = pci_vfio_get_msix_bar(vfio_dev_fd, &msix_bar, + &msix_table_offset, &msix_table_size); + if (ret < 0) { + RTE_LOG(ERR, EAL, " %s cannot get MSI-X BAR number!\n", pci_addr); + close(vfio_dev_fd); + return -1; + } + + /* if we're in a primary process, allocate vfio_res and get region info */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + vfio_res = rte_zmalloc("VFIO_RES", sizeof(*vfio_res), 0); + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, + "%s(): cannot store uio mmap details\n", __func__); + close(vfio_dev_fd); + return -1; + } + memcpy(&vfio_res->pci_addr, &dev->addr, sizeof(vfio_res->pci_addr)); + + /* get number of registers (up to BAR5) */ + vfio_res->nb_maps = RTE_MIN((int) device_info.num_regions, + VFIO_PCI_BAR5_REGION_INDEX + 1); + } else { + /* if we're in a secondary process, just find our tailq entry */ + TAILQ_FOREACH(vfio_res, vfio_res_list, next) { + if (memcmp(&vfio_res->pci_addr, &dev->addr, sizeof(dev->addr))) + continue; + break; + } + /* if we haven't found our tailq entry, something's wrong */ + if (vfio_res == NULL) { + RTE_LOG(ERR, EAL, " %s cannot find TAILQ entry for PCI device!\n", + pci_addr); + close(vfio_dev_fd); + return -1; + } + } + + /* map BARs */ + maps = vfio_res->maps; + + for (i = 0; i < (int) vfio_res->nb_maps; i++) { + struct vfio_region_info reg = { .argsz = sizeof(reg) }; + void *bar_addr; + struct memreg { + unsigned long offset, size; + } memreg[2] = {}; + + reg.index = i; + + ret = ioctl(vfio_dev_fd, VFIO_DEVICE_GET_REGION_INFO, ®); + + if (ret) { + RTE_LOG(ERR, EAL, " %s cannot get device region info " + "error %i (%s)\n", pci_addr, errno, strerror(errno)); + close(vfio_dev_fd); + if (internal_config.process_type == RTE_PROC_PRIMARY) + rte_free(vfio_res); + return -1; + } + + /* chk for io port region */ + ret = pread64(vfio_dev_fd, &ioport_bar, sizeof(ioport_bar), + VFIO_GET_REGION_ADDR(VFIO_PCI_CONFIG_REGION_INDEX) + + PCI_BASE_ADDRESS_0 + i*4); + + if (ret != sizeof(ioport_bar)) { + RTE_LOG(ERR, EAL, + "Cannot read command (%x) from config space!\n", + PCI_BASE_ADDRESS_0 + i*4); + return -1; + } + + if (ioport_bar & PCI_BASE_ADDRESS_SPACE_IO) { + RTE_LOG(INFO, EAL, + "Ignore mapping IO port bar(%d) addr: %x\n", + i, ioport_bar); + continue; + } + + /* skip non-mmapable BARs */ + if ((reg.flags & VFIO_REGION_INFO_FLAG_MMAP) == 0) + continue; + + if (i == msix_bar) { + /* + * VFIO will not let us map the MSI-X table, + * but we can map around it. + */ + uint32_t table_start = msix_table_offset; + uint32_t table_end = table_start + msix_table_size; + table_end = (table_end + ~PAGE_MASK) & PAGE_MASK; + table_start &= PAGE_MASK; + + if (table_start == 0 && table_end >= reg.size) { + /* Cannot map this BAR */ + RTE_LOG(DEBUG, EAL, "Skipping BAR %d\n", i); + continue; + } else { + memreg[0].offset = reg.offset; + memreg[0].size = table_start; + memreg[1].offset = table_end; + memreg[1].size = reg.size - table_end; + + RTE_LOG(DEBUG, EAL, + "Trying to map BAR %d that contains the MSI-X " + "table. Trying offsets: " + "0x%04lx:0x%04lx, 0x%04lx:0x%04lx\n", i, + memreg[0].offset, memreg[0].size, + memreg[1].offset, memreg[1].size); + } + } else { + memreg[0].offset = reg.offset; + memreg[0].size = reg.size; + } + + /* try to figure out an address */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + /* try mapping somewhere close to the end of hugepages */ + if (pci_map_addr == NULL) + pci_map_addr = pci_find_max_end_va(); + + bar_addr = pci_map_addr; + pci_map_addr = RTE_PTR_ADD(bar_addr, (size_t) reg.size); + } else { + bar_addr = maps[i].addr; + } + + /* reserve the address using an inaccessible mapping */ + bar_addr = mmap(bar_addr, reg.size, 0, MAP_PRIVATE | + MAP_ANONYMOUS, -1, 0); + if (bar_addr != MAP_FAILED) { + void *map_addr = NULL; + if (memreg[0].size) { + /* actual map of first part */ + map_addr = pci_map_resource(bar_addr, vfio_dev_fd, + memreg[0].offset, + memreg[0].size, + MAP_FIXED); + } + + /* if there's a second part, try to map it */ + if (map_addr != MAP_FAILED + && memreg[1].offset && memreg[1].size) { + void *second_addr = RTE_PTR_ADD(bar_addr, memreg[1].offset); + map_addr = pci_map_resource(second_addr, + vfio_dev_fd, memreg[1].offset, + memreg[1].size, + MAP_FIXED); + } + + if (map_addr == MAP_FAILED || !map_addr) { + munmap(bar_addr, reg.size); + bar_addr = MAP_FAILED; + } + } + + if (bar_addr == MAP_FAILED || + (internal_config.process_type == RTE_PROC_SECONDARY && + bar_addr != maps[i].addr)) { + RTE_LOG(ERR, EAL, " %s mapping BAR%i failed: %s\n", pci_addr, i, + strerror(errno)); + close(vfio_dev_fd); + if (internal_config.process_type == RTE_PROC_PRIMARY) + rte_free(vfio_res); + return -1; + } + + maps[i].addr = bar_addr; + maps[i].offset = reg.offset; + maps[i].size = reg.size; + maps[i].path = NULL; /* vfio doesn't have per-resource paths */ + dev->mem_resource[i].addr = bar_addr; + } + + /* if secondary process, do not set up interrupts */ + if (internal_config.process_type == RTE_PROC_PRIMARY) { + if (pci_vfio_setup_interrupts(dev, vfio_dev_fd) != 0) { + RTE_LOG(ERR, EAL, " %s error setting up interrupts!\n", pci_addr); + close(vfio_dev_fd); + rte_free(vfio_res); + return -1; + } + + /* set bus mastering for the device */ + if (pci_vfio_set_bus_master(vfio_dev_fd)) { + RTE_LOG(ERR, EAL, " %s cannot set up bus mastering!\n", pci_addr); + close(vfio_dev_fd); + rte_free(vfio_res); + return -1; + } + + /* Reset the device */ + ioctl(vfio_dev_fd, VFIO_DEVICE_RESET); + } + + if (internal_config.process_type == RTE_PROC_PRIMARY) + TAILQ_INSERT_TAIL(vfio_res_list, vfio_res, next); + + return 0; +} + +int +pci_vfio_ioport_map(struct rte_pci_device *dev, int bar, + struct rte_pci_ioport *p) +{ + if (bar < VFIO_PCI_BAR0_REGION_INDEX || + bar > VFIO_PCI_BAR5_REGION_INDEX) { + RTE_LOG(ERR, EAL, "invalid bar (%d)!\n", bar); + return -1; + } + + p->dev = dev; + p->base = VFIO_GET_REGION_ADDR(bar); + return 0; +} + +void +pci_vfio_ioport_read(struct rte_pci_ioport *p, + void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pread64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't read from PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +void +pci_vfio_ioport_write(struct rte_pci_ioport *p, + const void *data, size_t len, off_t offset) +{ + const struct rte_intr_handle *intr_handle = &p->dev->intr_handle; + + if (pwrite64(intr_handle->vfio_dev_fd, data, + len, p->base + offset) <= 0) + RTE_LOG(ERR, EAL, + "Can't write to PCI bar (%" PRIu64 ") : offset (%x)\n", + VFIO_GET_REGION_IDX(p->base), (int)offset); +} + +int +pci_vfio_ioport_unmap(struct rte_pci_ioport *p) +{ + RTE_SET_USED(p); + return -1; +} + +int +pci_vfio_enable(void) +{ + /* initialize group list */ + int i; + int vfio_available; + + for (i = 0; i < VFIO_MAX_GROUPS; i++) { + vfio_cfg.vfio_groups[i].fd = -1; + vfio_cfg.vfio_groups[i].group_no = -1; + } + + /* inform the user that we are probing for VFIO */ + RTE_LOG(INFO, EAL, "Probing VFIO support...\n"); + + /* check if vfio-pci module is loaded */ + vfio_available = rte_eal_check_module("vfio_pci"); + + /* return error directly */ + if (vfio_available == -1) { + RTE_LOG(INFO, EAL, "Could not get loaded module details!\n"); + return -1; + } + + /* return 0 if VFIO modules not loaded */ + if (vfio_available == 0) { + RTE_LOG(DEBUG, EAL, "VFIO modules not loaded, " + "skipping VFIO support...\n"); + return 0; + } + + vfio_cfg.vfio_container_fd = pci_vfio_get_container_fd(); + + /* check if we have VFIO driver enabled */ + if (vfio_cfg.vfio_container_fd != -1) { + RTE_LOG(NOTICE, EAL, "VFIO support initialized\n"); + vfio_cfg.vfio_enabled = 1; + } else { + RTE_LOG(NOTICE, EAL, "VFIO support could not be initialized\n"); + } + + return 0; +} + +int +pci_vfio_is_enabled(void) +{ + return vfio_cfg.vfio_enabled; +} +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c new file mode 100644 index 00000000..d9188fde --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_pci_vfio_mp_sync.c @@ -0,0 +1,405 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include + +/* sys/un.h with __USE_MISC uses strlen, which is unsafe */ +#ifdef __USE_MISC +#define REMOVED_USE_MISC +#undef __USE_MISC +#endif +#include +/* make sure we redefine __USE_MISC only if it was previously undefined */ +#ifdef REMOVED_USE_MISC +#define __USE_MISC +#undef REMOVED_USE_MISC +#endif + +#include +#include +#include +#include + +#include "eal_filesystem.h" +#include "eal_pci_init.h" +#include "eal_thread.h" + +/** + * @file + * VFIO socket for communication between primary and secondary processes. + * + * This file is only compiled if CONFIG_RTE_EAL_VFIO is set to "y". + */ + +#ifdef VFIO_PRESENT + +#define SOCKET_PATH_FMT "%s/.%s_mp_socket" +#define CMSGLEN (CMSG_LEN(sizeof(int))) +#define FD_TO_CMSGHDR(fd, chdr) \ + do {\ + (chdr).cmsg_len = CMSGLEN;\ + (chdr).cmsg_level = SOL_SOCKET;\ + (chdr).cmsg_type = SCM_RIGHTS;\ + memcpy((chdr).__cmsg_data, &(fd), sizeof(fd));\ + } while (0) +#define CMSGHDR_TO_FD(chdr, fd) \ + memcpy(&(fd), (chdr).__cmsg_data, sizeof(fd)) + +static pthread_t socket_thread; +static int mp_socket_fd; + + +/* get socket path (/var/run if root, $HOME otherwise) */ +static void +get_socket_path(char *buffer, int bufsz) +{ + const char *dir = "/var/run"; + const char *home_dir = getenv("HOME"); + + if (getuid() != 0 && home_dir != NULL) + dir = home_dir; + + /* use current prefix as file path */ + snprintf(buffer, bufsz, SOCKET_PATH_FMT, dir, + internal_config.hugefile_prefix); +} + + + +/* + * data flow for socket comm protocol: + * 1. client sends SOCKET_REQ_CONTAINER or SOCKET_REQ_GROUP + * 1a. in case of SOCKET_REQ_GROUP, client also then sends group number + * 2. server receives message + * 2a. in case of invalid group, SOCKET_ERR is sent back to client + * 2b. in case of unbound group, SOCKET_NO_FD is sent back to client + * 2c. in case of valid group, SOCKET_OK is sent and immediately followed by fd + * + * in case of any error, socket is closed. + */ + +/* send a request, return -1 on error */ +int +vfio_mp_sync_send_request(int socket, int req) +{ + struct msghdr hdr; + struct iovec iov; + int buf; + int ret; + + memset(&hdr, 0, sizeof(hdr)); + + buf = req; + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + + ret = sendmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + return 0; +} + +/* receive a request and return it */ +int +vfio_mp_sync_receive_request(int socket) +{ + int buf; + struct msghdr hdr; + struct iovec iov; + int ret, req; + + memset(&hdr, 0, sizeof(hdr)); + + buf = SOCKET_ERR; + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + + ret = recvmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + + req = buf; + + return req; +} + +/* send OK in message, fd in control message */ +int +vfio_mp_sync_send_fd(int socket, int fd) +{ + int buf; + struct msghdr hdr; + struct cmsghdr *chdr; + char chdr_buf[CMSGLEN]; + struct iovec iov; + int ret; + + chdr = (struct cmsghdr *) chdr_buf; + memset(chdr, 0, sizeof(chdr_buf)); + memset(&hdr, 0, sizeof(hdr)); + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + hdr.msg_control = chdr; + hdr.msg_controllen = CMSGLEN; + + buf = SOCKET_OK; + FD_TO_CMSGHDR(fd, *chdr); + + ret = sendmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + return 0; +} + +/* receive OK in message, fd in control message */ +int +vfio_mp_sync_receive_fd(int socket) +{ + int buf; + struct msghdr hdr; + struct cmsghdr *chdr; + char chdr_buf[CMSGLEN]; + struct iovec iov; + int ret, req, fd; + + buf = SOCKET_ERR; + + chdr = (struct cmsghdr *) chdr_buf; + memset(chdr, 0, sizeof(chdr_buf)); + memset(&hdr, 0, sizeof(hdr)); + + hdr.msg_iov = &iov; + hdr.msg_iovlen = 1; + iov.iov_base = (char *) &buf; + iov.iov_len = sizeof(buf); + hdr.msg_control = chdr; + hdr.msg_controllen = CMSGLEN; + + ret = recvmsg(socket, &hdr, 0); + if (ret < 0) + return -1; + + req = buf; + + if (req != SOCKET_OK) + return -1; + + CMSGHDR_TO_FD(*chdr, fd); + + return fd; +} + +/* connect socket_fd in secondary process to the primary process's socket */ +int +vfio_mp_sync_connect_to_primary(void) +{ + struct sockaddr_un addr; + socklen_t sockaddr_len; + int socket_fd; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + if (connect(socket_fd, (struct sockaddr *) &addr, sockaddr_len) == 0) + return socket_fd; + + /* if connect failed */ + close(socket_fd); + return -1; +} + + + +/* + * socket listening thread for primary process + */ +static __attribute__((noreturn)) void * +pci_vfio_mp_sync_thread(void __rte_unused * arg) +{ + int ret, fd, vfio_group_no; + + /* wait for requests on the socket */ + for (;;) { + int conn_sock; + struct sockaddr_un addr; + socklen_t sockaddr_len = sizeof(addr); + + /* this is a blocking call */ + conn_sock = accept(mp_socket_fd, (struct sockaddr *) &addr, + &sockaddr_len); + + /* just restart on error */ + if (conn_sock == -1) + continue; + + /* set socket to linger after close */ + struct linger l; + l.l_onoff = 1; + l.l_linger = 60; + setsockopt(conn_sock, SOL_SOCKET, SO_LINGER, &l, sizeof(l)); + + ret = vfio_mp_sync_receive_request(conn_sock); + + switch (ret) { + case SOCKET_REQ_CONTAINER: + fd = pci_vfio_get_container_fd(); + if (fd < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + else + vfio_mp_sync_send_fd(conn_sock, fd); + break; + case SOCKET_REQ_GROUP: + /* wait for group number */ + vfio_group_no = vfio_mp_sync_receive_request(conn_sock); + if (vfio_group_no < 0) { + close(conn_sock); + continue; + } + + fd = pci_vfio_get_group_fd(vfio_group_no); + + if (fd < 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + /* if VFIO group exists but isn't bound to VFIO driver */ + else if (fd == 0) + vfio_mp_sync_send_request(conn_sock, SOCKET_NO_FD); + /* if group exists and is bound to VFIO driver */ + else { + vfio_mp_sync_send_request(conn_sock, SOCKET_OK); + vfio_mp_sync_send_fd(conn_sock, fd); + } + break; + default: + vfio_mp_sync_send_request(conn_sock, SOCKET_ERR); + break; + } + close(conn_sock); + } +} + +static int +vfio_mp_sync_socket_setup(void) +{ + int ret, socket_fd; + struct sockaddr_un addr; + socklen_t sockaddr_len; + + /* set up a socket */ + socket_fd = socket(AF_UNIX, SOCK_SEQPACKET, 0); + if (socket_fd < 0) { + RTE_LOG(ERR, EAL, "Failed to create socket!\n"); + return -1; + } + + get_socket_path(addr.sun_path, sizeof(addr.sun_path)); + addr.sun_family = AF_UNIX; + + sockaddr_len = sizeof(struct sockaddr_un); + + unlink(addr.sun_path); + + ret = bind(socket_fd, (struct sockaddr *) &addr, sockaddr_len); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to bind socket: %s!\n", strerror(errno)); + close(socket_fd); + return -1; + } + + ret = listen(socket_fd, 50); + if (ret) { + RTE_LOG(ERR, EAL, "Failed to listen: %s!\n", strerror(errno)); + close(socket_fd); + return -1; + } + + /* save the socket in local configuration */ + mp_socket_fd = socket_fd; + + return 0; +} + +/* + * set up a local socket and tell it to listen for incoming connections + */ +int +pci_vfio_mp_sync_setup(void) +{ + int ret; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (vfio_mp_sync_socket_setup() < 0) { + RTE_LOG(ERR, EAL, "Failed to set up local socket!\n"); + return -1; + } + + ret = pthread_create(&socket_thread, NULL, + pci_vfio_mp_sync_thread, NULL); + if (ret) { + RTE_LOG(ERR, EAL, + "Failed to create thread for communication with secondary processes!\n"); + close(mp_socket_fd); + return -1; + } + + /* Set thread_name for aid in debugging. */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "pci-vfio-sync"); + ret = rte_thread_setname(socket_thread, thread_name); + if (ret) + RTE_LOG(ERR, EAL, + "Failed to set thread name for secondary processes!\n"); + + return 0; +} + +#endif diff --git a/lib/librte_eal/linuxapp/eal/eal_thread.c b/lib/librte_eal/linuxapp/eal/eal_thread.c new file mode 100644 index 00000000..18bd8e04 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_thread.c @@ -0,0 +1,199 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_thread.h" + +RTE_DEFINE_PER_LCORE(unsigned, _lcore_id) = LCORE_ID_ANY; +RTE_DEFINE_PER_LCORE(unsigned, _socket_id) = (unsigned)SOCKET_ID_ANY; +RTE_DEFINE_PER_LCORE(rte_cpuset_t, _cpuset); + +/* + * Send a message to a slave lcore identified by slave_id to call a + * function f with argument arg. Once the execution is done, the + * remote lcore switch in FINISHED state. + */ +int +rte_eal_remote_launch(int (*f)(void *), void *arg, unsigned slave_id) +{ + int n; + char c = 0; + int m2s = lcore_config[slave_id].pipe_master2slave[1]; + int s2m = lcore_config[slave_id].pipe_slave2master[0]; + + if (lcore_config[slave_id].state != WAIT) + return -EBUSY; + + lcore_config[slave_id].f = f; + lcore_config[slave_id].arg = arg; + + /* send message */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(m2s, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + /* wait ack */ + do { + n = read(s2m, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + return 0; +} + +/* set affinity for current EAL thread */ +static int +eal_thread_set_affinity(void) +{ + unsigned lcore_id = rte_lcore_id(); + + /* acquire system unique id */ + rte_gettid(); + + /* update EAL thread core affinity */ + return rte_thread_set_affinity(&lcore_config[lcore_id].cpuset); +} + +void eal_thread_init_master(unsigned lcore_id) +{ + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); +} + +/* main loop of threads */ +__attribute__((noreturn)) void * +eal_thread_loop(__attribute__((unused)) void *arg) +{ + char c; + int n, ret; + unsigned lcore_id; + pthread_t thread_id; + int m2s, s2m; + char cpuset[RTE_CPU_AFFINITY_STR_LEN]; + + thread_id = pthread_self(); + + /* retrieve our lcore_id from the configuration structure */ + RTE_LCORE_FOREACH_SLAVE(lcore_id) { + if (thread_id == lcore_config[lcore_id].thread_id) + break; + } + if (lcore_id == RTE_MAX_LCORE) + rte_panic("cannot retrieve lcore id\n"); + + m2s = lcore_config[lcore_id].pipe_master2slave[0]; + s2m = lcore_config[lcore_id].pipe_slave2master[1]; + + /* set the lcore ID in per-lcore memory area */ + RTE_PER_LCORE(_lcore_id) = lcore_id; + + /* set CPU affinity */ + if (eal_thread_set_affinity() < 0) + rte_panic("cannot set affinity\n"); + + ret = eal_thread_dump_affinity(cpuset, RTE_CPU_AFFINITY_STR_LEN); + + RTE_LOG(DEBUG, EAL, "lcore %u is ready (tid=%x;cpuset=[%s%s])\n", + lcore_id, (int)thread_id, cpuset, ret == 0 ? "" : "..."); + + /* read on our pipe to get commands */ + while (1) { + void *fct_arg; + + /* wait command */ + do { + n = read(m2s, &c, 1); + } while (n < 0 && errno == EINTR); + + if (n <= 0) + rte_panic("cannot read on configuration pipe\n"); + + lcore_config[lcore_id].state = RUNNING; + + /* send ack */ + n = 0; + while (n == 0 || (n < 0 && errno == EINTR)) + n = write(s2m, &c, 1); + if (n < 0) + rte_panic("cannot write on configuration pipe\n"); + + if (lcore_config[lcore_id].f == NULL) + rte_panic("NULL function pointer\n"); + + /* call the function and store the return value */ + fct_arg = lcore_config[lcore_id].arg; + ret = lcore_config[lcore_id].f(fct_arg); + lcore_config[lcore_id].ret = ret; + rte_wmb(); + lcore_config[lcore_id].state = FINISHED; + } + + /* never reached */ + /* pthread_exit(NULL); */ + /* return NULL; */ +} + +/* require calling thread tid by gettid() */ +int rte_sys_gettid(void) +{ + return (int)syscall(SYS_gettid); +} diff --git a/lib/librte_eal/linuxapp/eal/eal_timer.c b/lib/librte_eal/linuxapp/eal/eal_timer.c new file mode 100644 index 00000000..f2abb7b6 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_timer.c @@ -0,0 +1,305 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * Copyright(c) 2012-2013 6WIND S.A. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" + +enum timer_source eal_timer_source = EAL_TIMER_HPET; + +#ifdef RTE_LIBEAL_USE_HPET + +#define DEV_HPET "/dev/hpet" + +/* Maximum number of counters. */ +#define HPET_TIMER_NUM 3 + +/* General capabilities register */ +#define CLK_PERIOD_SHIFT 32 /* Clock period shift. */ +#define CLK_PERIOD_MASK 0xffffffff00000000ULL /* Clock period mask. */ + +/** + * HPET timer registers. From the Intel IA-PC HPET (High Precision Event + * Timers) Specification. + */ +struct eal_hpet_regs { + /* Memory-mapped, software visible registers */ + uint64_t capabilities; /**< RO General Capabilities Register. */ + uint64_t reserved0; /**< Reserved for future use. */ + uint64_t config; /**< RW General Configuration Register. */ + uint64_t reserved1; /**< Reserved for future use. */ + uint64_t isr; /**< RW Clear General Interrupt Status. */ + uint64_t reserved2[25]; /**< Reserved for future use. */ + union { + uint64_t counter; /**< RW Main Counter Value Register. */ + struct { + uint32_t counter_l; /**< RW Main Counter Low. */ + uint32_t counter_h; /**< RW Main Counter High. */ + }; + }; + uint64_t reserved3; /**< Reserved for future use. */ + struct { + uint64_t config; /**< RW Timer Config and Capability Reg. */ + uint64_t comp; /**< RW Timer Comparator Value Register. */ + uint64_t fsb; /**< RW FSB Interrupt Route Register. */ + uint64_t reserved4; /**< Reserved for future use. */ + } timers[HPET_TIMER_NUM]; /**< Set of HPET timers. */ +}; + +/* Mmap'd hpet registers */ +static volatile struct eal_hpet_regs *eal_hpet = NULL; + +/* Period at which the HPET counter increments in + * femtoseconds (10^-15 seconds). */ +static uint32_t eal_hpet_resolution_fs = 0; + +/* Frequency of the HPET counter in Hz */ +static uint64_t eal_hpet_resolution_hz = 0; + +/* Incremented 4 times during one 32bits hpet full count */ +static uint32_t eal_hpet_msb; + +static pthread_t msb_inc_thread_id; + +/* + * This function runs on a specific thread to update a global variable + * containing used to process MSB of the HPET (unfortunatelly, we need + * this because hpet is 32 bits by default under linux). + */ +static void +hpet_msb_inc(__attribute__((unused)) void *arg) +{ + uint32_t t; + + while (1) { + t = (eal_hpet->counter_l >> 30); + if (t != (eal_hpet_msb & 3)) + eal_hpet_msb ++; + sleep(10); + } +} + +uint64_t +rte_get_hpet_hz(void) +{ + if(internal_config.no_hpet) + rte_panic("Error, HPET called, but no HPET present\n"); + + return eal_hpet_resolution_hz; +} + +uint64_t +rte_get_hpet_cycles(void) +{ + uint32_t t, msb; + uint64_t ret; + + if(internal_config.no_hpet) + rte_panic("Error, HPET called, but no HPET present\n"); + + t = eal_hpet->counter_l; + msb = eal_hpet_msb; + ret = (msb + 2 - (t >> 30)) / 4; + ret <<= 32; + ret += t; + return ret; +} + +#endif + +#ifdef RTE_LIBEAL_USE_HPET +/* + * Open and mmap /dev/hpet (high precision event timer) that will + * provide our time reference. + */ +int +rte_eal_hpet_init(int make_default) +{ + int fd, ret; + char thread_name[RTE_MAX_THREAD_NAME_LEN]; + + if (internal_config.no_hpet) { + RTE_LOG(NOTICE, EAL, "HPET is disabled\n"); + return -1; + } + + fd = open(DEV_HPET, O_RDONLY); + if (fd < 0) { + RTE_LOG(ERR, EAL, "ERROR: Cannot open "DEV_HPET": %s!\n", + strerror(errno)); + internal_config.no_hpet = 1; + return -1; + } + eal_hpet = mmap(NULL, 1024, PROT_READ, MAP_SHARED, fd, 0); + if (eal_hpet == MAP_FAILED) { + RTE_LOG(ERR, EAL, "ERROR: Cannot mmap "DEV_HPET"!\n" + "Please enable CONFIG_HPET_MMAP in your kernel configuration " + "to allow HPET support.\n" + "To run without using HPET, set CONFIG_RTE_LIBEAL_USE_HPET=n " + "in your build configuration or use '--no-hpet' EAL flag.\n"); + close(fd); + internal_config.no_hpet = 1; + return -1; + } + close(fd); + + eal_hpet_resolution_fs = (uint32_t)((eal_hpet->capabilities & + CLK_PERIOD_MASK) >> + CLK_PERIOD_SHIFT); + + eal_hpet_resolution_hz = (1000ULL*1000ULL*1000ULL*1000ULL*1000ULL) / + (uint64_t)eal_hpet_resolution_fs; + + RTE_LOG(INFO, EAL, "HPET frequency is ~%"PRIu64" kHz\n", + eal_hpet_resolution_hz/1000); + + eal_hpet_msb = (eal_hpet->counter_l >> 30); + + /* create a thread that will increment a global variable for + * msb (hpet is 32 bits by default under linux) */ + ret = pthread_create(&msb_inc_thread_id, NULL, + (void *(*)(void *))hpet_msb_inc, NULL); + if (ret != 0) { + RTE_LOG(ERR, EAL, "ERROR: Cannot create HPET timer thread!\n"); + internal_config.no_hpet = 1; + return -1; + } + + /* + * Set thread_name for aid in debugging. + */ + snprintf(thread_name, RTE_MAX_THREAD_NAME_LEN, "hpet-msb-inc"); + ret = rte_thread_setname(msb_inc_thread_id, thread_name); + if (ret != 0) + RTE_LOG(ERR, EAL, + "ERROR: Cannot set HPET timer thread name!\n"); + + if (make_default) + eal_timer_source = EAL_TIMER_HPET; + return 0; +} +#endif + +static void +check_tsc_flags(void) +{ + char line[512]; + FILE *stream; + + stream = fopen("/proc/cpuinfo", "r"); + if (!stream) { + RTE_LOG(WARNING, EAL, "WARNING: Unable to open /proc/cpuinfo\n"); + return; + } + + while (fgets(line, sizeof line, stream)) { + char *constant_tsc; + char *nonstop_tsc; + + if (strncmp(line, "flags", 5) != 0) + continue; + + constant_tsc = strstr(line, "constant_tsc"); + nonstop_tsc = strstr(line, "nonstop_tsc"); + if (!constant_tsc || !nonstop_tsc) + RTE_LOG(WARNING, EAL, + "WARNING: cpu flags " + "constant_tsc=%s " + "nonstop_tsc=%s " + "-> using unreliable clock cycles !\n", + constant_tsc ? "yes":"no", + nonstop_tsc ? "yes":"no"); + break; + } + + fclose(stream); +} + +uint64_t +get_tsc_freq(void) +{ +#ifdef CLOCK_MONOTONIC_RAW +#define NS_PER_SEC 1E9 + + struct timespec sleeptime = {.tv_nsec = NS_PER_SEC / 10 }; /* 1/10 second */ + + struct timespec t_start, t_end; + uint64_t tsc_hz; + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &t_start) == 0) { + uint64_t ns, end, start = rte_rdtsc(); + nanosleep(&sleeptime,NULL); + clock_gettime(CLOCK_MONOTONIC_RAW, &t_end); + end = rte_rdtsc(); + ns = ((t_end.tv_sec - t_start.tv_sec) * NS_PER_SEC); + ns += (t_end.tv_nsec - t_start.tv_nsec); + + double secs = (double)ns/NS_PER_SEC; + tsc_hz = (uint64_t)((end - start)/secs); + return tsc_hz; + } +#endif + return 0; +} + +int +rte_eal_timer_init(void) +{ + + eal_timer_source = EAL_TIMER_TSC; + + set_tsc_freq(); + check_tsc_flags(); + return 0; +} diff --git a/lib/librte_eal/linuxapp/eal/eal_vfio.h b/lib/librte_eal/linuxapp/eal/eal_vfio.h new file mode 100644 index 00000000..f483bf40 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_vfio.h @@ -0,0 +1,67 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef EAL_VFIO_H_ +#define EAL_VFIO_H_ + +/* + * determine if VFIO is present on the system + */ +#ifdef RTE_EAL_VFIO +#include +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 6, 0) +#include + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 10, 0) +#define RTE_PCI_MSIX_TABLE_BIR 0x7 +#define RTE_PCI_MSIX_TABLE_OFFSET 0xfffffff8 +#define RTE_PCI_MSIX_FLAGS_QSIZE 0x07ff +#else +#define RTE_PCI_MSIX_TABLE_BIR PCI_MSIX_TABLE_BIR +#define RTE_PCI_MSIX_TABLE_OFFSET PCI_MSIX_TABLE_OFFSET +#define RTE_PCI_MSIX_FLAGS_QSIZE PCI_MSIX_FLAGS_QSIZE +#endif + +#define RTE_VFIO_TYPE1 VFIO_TYPE1_IOMMU + +#if LINUX_VERSION_CODE < KERNEL_VERSION(4, 5, 0) +#define RTE_VFIO_NOIOMMU 8 +#else +#define RTE_VFIO_NOIOMMU VFIO_NOIOMMU_IOMMU +#endif + +#define VFIO_PRESENT +#endif /* kernel version */ +#endif /* RTE_EAL_VFIO */ + +#endif /* EAL_VFIO_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/eal_xen_memory.c b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c new file mode 100644 index 00000000..495eef9e --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/eal_xen_memory.c @@ -0,0 +1,369 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "eal_private.h" +#include "eal_internal_cfg.h" +#include "eal_filesystem.h" +#include + +#define PAGE_SIZE RTE_PGSIZE_4K +#define DEFAUL_DOM0_NAME "dom0-mem" + +static int xen_fd = -1; +static const char sys_dir_path[] = "/sys/kernel/mm/dom0-mm/memsize-mB"; + +/* + * Try to mmap *size bytes in /dev/zero. If it is successful, return the + * pointer to the mmap'd area and keep *size unmodified. Else, retry + * with a smaller zone: decrease *size by mem_size until it reaches + * 0. In this case, return NULL. Note: this function returns an address + * which is a multiple of mem_size size. + */ +static void * +xen_get_virtual_area(size_t *size, size_t mem_size) +{ + void *addr; + int fd; + long aligned_addr; + + RTE_LOG(DEBUG, EAL, "Ask a virtual area of 0x%zu bytes\n", *size); + + fd = open("/dev/zero", O_RDONLY); + if (fd < 0){ + RTE_LOG(ERR, EAL, "Cannot open /dev/zero\n"); + return NULL; + } + do { + addr = mmap(NULL, (*size) + mem_size, PROT_READ, + MAP_PRIVATE, fd, 0); + if (addr == MAP_FAILED) + *size -= mem_size; + } while (addr == MAP_FAILED && *size > 0); + + if (addr == MAP_FAILED) { + close(fd); + RTE_LOG(ERR, EAL, "Cannot get a virtual area\n"); + return NULL; + } + + munmap(addr, (*size) + mem_size); + close(fd); + + /* align addr to a mem_size boundary */ + aligned_addr = (uintptr_t)addr; + aligned_addr = RTE_ALIGN_CEIL(aligned_addr, mem_size); + addr = (void *)(aligned_addr); + + RTE_LOG(DEBUG, EAL, "Virtual area found at %p (size = 0x%zx)\n", + addr, *size); + + return addr; +} + +/** + * Get memory size configuration from /sys/devices/virtual/misc/dom0_mm + * /memsize-mB/memsize file, and the size unit is mB. + */ +static int +get_xen_memory_size(void) +{ + char path[PATH_MAX]; + unsigned long mem_size = 0; + static const char *file_name; + + file_name = "memsize"; + snprintf(path, sizeof(path), "%s/%s", + sys_dir_path, file_name); + + if (eal_parse_sysfs_value(path, &mem_size) < 0) + return -1; + + if (mem_size == 0) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s was not" + " configured.\n",sys_dir_path, file_name); + if (mem_size % 2) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s must be" + " even number.\n",sys_dir_path, file_name); + + if (mem_size > DOM0_CONFIG_MEMSIZE) + rte_exit(EXIT_FAILURE,"XEN-DOM0:the %s/%s should not be larger" + " than %d mB\n",sys_dir_path, file_name, DOM0_CONFIG_MEMSIZE); + + return mem_size; +} + +/** + * Based on physical address to caculate MFN in Xen Dom0. + */ +phys_addr_t +rte_xen_mem_phy2mch(uint32_t memseg_id, const phys_addr_t phy_addr) +{ + int mfn_id; + uint64_t mfn, mfn_offset; + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg *memseg = mcfg->memseg; + + mfn_id = (phy_addr - memseg[memseg_id].phys_addr) / RTE_PGSIZE_2M; + + /*the MFN is contiguous in 2M */ + mfn_offset = (phy_addr - memseg[memseg_id].phys_addr) % + RTE_PGSIZE_2M / PAGE_SIZE; + mfn = mfn_offset + memseg[memseg_id].mfn[mfn_id]; + + /** return mechine address */ + return mfn * PAGE_SIZE + phy_addr % PAGE_SIZE; +} + +int +rte_xen_dom0_memory_init(void) +{ + void *vir_addr, *vma_addr = NULL; + int err, ret = 0; + uint32_t i, requested, mem_size, memseg_idx, num_memseg = 0; + size_t vma_len = 0; + struct memory_info meminfo; + struct memseg_info seginfo[RTE_MAX_MEMSEG]; + int flags, page_size = getpagesize(); + struct rte_mem_config *mcfg = rte_eal_get_configuration()->mem_config; + struct rte_memseg *memseg = mcfg->memseg; + uint64_t total_mem = internal_config.memory; + + memset(seginfo, 0, sizeof(seginfo)); + memset(&meminfo, 0, sizeof(struct memory_info)); + + mem_size = get_xen_memory_size(); + requested = (unsigned) (total_mem / 0x100000); + if (requested > mem_size) + /* if we didn't satisfy total memory requirements */ + rte_exit(EXIT_FAILURE,"Not enough memory available! Requested: %uMB," + " available: %uMB\n", requested, mem_size); + else if (total_mem != 0) + mem_size = requested; + + /* Check FD and open once */ + if (xen_fd < 0) { + xen_fd = open(DOM0_MM_DEV, O_RDWR); + if (xen_fd < 0) { + RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); + return -1; + } + } + + meminfo.size = mem_size; + + /* construct memory mangement name for Dom0 */ + snprintf(meminfo.name, DOM0_NAME_MAX, "%s-%s", + internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); + + /* Notify kernel driver to allocate memory */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_PREPARE_MEMSEG, &meminfo); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memory\n"); + err = -EIO; + goto fail; + } + + /* Get number of memory segment from driver */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_NUM_MEMSEG, &num_memseg); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg count.\n"); + err = -EIO; + goto fail; + } + + if(num_memseg > RTE_MAX_MEMSEG){ + RTE_LOG(ERR, EAL, "XEN DOM0: the memseg count %d is greater" + " than max memseg %d.\n",num_memseg, RTE_MAX_MEMSEG); + err = -EIO; + goto fail; + } + + /* get all memory segements information */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_GET_MEMSEG_INFO, seginfo); + if (ret < 0) { + RTE_LOG(ERR, EAL, "XEN DOM0:failed to get memseg info.\n"); + err = -EIO; + goto fail; + } + + /* map all memory segments to contiguous user space */ + for (memseg_idx = 0; memseg_idx < num_memseg; memseg_idx++) + { + vma_len = seginfo[memseg_idx].size; + + /** + * get the biggest virtual memory area up to vma_len. If it fails, + * vma_addr is NULL, so let the kernel provide the address. + */ + vma_addr = xen_get_virtual_area(&vma_len, RTE_PGSIZE_2M); + if (vma_addr == NULL) { + flags = MAP_SHARED; + vma_len = RTE_PGSIZE_2M; + } else + flags = MAP_SHARED | MAP_FIXED; + + seginfo[memseg_idx].size = vma_len; + vir_addr = mmap(vma_addr, seginfo[memseg_idx].size, + PROT_READ|PROT_WRITE, flags, xen_fd, + memseg_idx * page_size); + if (vir_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "XEN DOM0:Could not mmap %s\n", + DOM0_MM_DEV); + err = -EIO; + goto fail; + } + + memseg[memseg_idx].addr = vir_addr; + memseg[memseg_idx].phys_addr = page_size * + seginfo[memseg_idx].pfn ; + memseg[memseg_idx].len = seginfo[memseg_idx].size; + for ( i = 0; i < seginfo[memseg_idx].size / RTE_PGSIZE_2M; i++) + memseg[memseg_idx].mfn[i] = seginfo[memseg_idx].mfn[i]; + + /* MFNs are continuous in 2M, so assume that page size is 2M */ + memseg[memseg_idx].hugepage_sz = RTE_PGSIZE_2M; + + memseg[memseg_idx].nchannel = mcfg->nchannel; + memseg[memseg_idx].nrank = mcfg->nrank; + + /* NUMA is not suppoted in Xen Dom0, so only set socket 0*/ + memseg[memseg_idx].socket_id = 0; + } + + return 0; +fail: + if (xen_fd > 0) { + close(xen_fd); + xen_fd = -1; + } + return err; +} + +/* + * This creates the memory mappings in the secondary process to match that of + * the server process. It goes through each memory segment in the DPDK runtime + * configuration, mapping them in order to form a contiguous block in the + * virtual memory space + */ +int +rte_xen_dom0_memory_attach(void) +{ + const struct rte_mem_config *mcfg; + unsigned s = 0; /* s used to track the segment number */ + int xen_fd = -1; + int ret = -1; + void *vir_addr; + char name[DOM0_NAME_MAX] = {0}; + int page_size = getpagesize(); + + mcfg = rte_eal_get_configuration()->mem_config; + + /* Check FD and open once */ + if (xen_fd < 0) { + xen_fd = open(DOM0_MM_DEV, O_RDWR); + if (xen_fd < 0) { + RTE_LOG(ERR, EAL, "Can not open %s\n",DOM0_MM_DEV); + goto error; + } + } + + /* construct memory mangement name for Dom0 */ + snprintf(name, DOM0_NAME_MAX, "%s-%s", + internal_config.hugefile_prefix, DEFAUL_DOM0_NAME); + /* attach to memory segments of primary process */ + ret = ioctl(xen_fd, RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG, name); + if (ret) { + RTE_LOG(ERR, EAL,"attach memory segments fail.\n"); + goto error; + } + + /* map all segments into memory to make sure we get the addrs */ + for (s = 0; s < RTE_MAX_MEMSEG; ++s) { + + /* + * the first memory segment with len==0 is the one that + * follows the last valid segment. + */ + if (mcfg->memseg[s].len == 0) + break; + + vir_addr = mmap(mcfg->memseg[s].addr, mcfg->memseg[s].len, + PROT_READ|PROT_WRITE, MAP_SHARED|MAP_FIXED, xen_fd, + s * page_size); + if (vir_addr == MAP_FAILED) { + RTE_LOG(ERR, EAL, "Could not mmap %llu bytes " + "in %s to requested address [%p]\n", + (unsigned long long)mcfg->memseg[s].len, DOM0_MM_DEV, + mcfg->memseg[s].addr); + goto error; + } + } + return 0; + +error: + if (xen_fd >= 0) { + close(xen_fd); + xen_fd = -1; + } + return -1; +} diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h new file mode 100644 index 00000000..d9707780 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_dom0_common.h @@ -0,0 +1,108 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_DOM0_COMMON_H_ +#define _RTE_DOM0_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +#define DOM0_NAME_MAX 256 +#define DOM0_MM_DEV "/dev/dom0_mm" + +#define DOM0_CONTIG_NUM_ORDER 9 /**< order of 2M */ +#define DOM0_NUM_MEMSEG 512 /**< Maximum nb. of memory segment. */ +#define DOM0_MEMBLOCK_SIZE 0x200000 /**< size of memory block(2M). */ +#define DOM0_CONFIG_MEMSIZE 4096 /**< Maximum config memory size(4G). */ +#define DOM0_NUM_MEMBLOCK (DOM0_CONFIG_MEMSIZE / 2) /**< Maximum nb. of 2M memory block. */ + +#define RTE_DOM0_IOCTL_PREPARE_MEMSEG _IOWR(0, 1 , struct memory_info) +#define RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG _IOWR(0, 2 , char *) +#define RTE_DOM0_IOCTL_GET_NUM_MEMSEG _IOWR(0, 3, int) +#define RTE_DOM0_IOCTL_GET_MEMSEG_INFO _IOWR(0, 4, void *) + +/** + * A structure used to store memory information. + */ +struct memory_info { + char name[DOM0_NAME_MAX]; + uint64_t size; +}; + +/** + * A structure used to store memory segment information. + */ +struct memseg_info { + uint32_t idx; + uint64_t pfn; + uint64_t size; + uint64_t mfn[DOM0_NUM_MEMBLOCK]; +}; + +/** + * A structure used to store memory block information. + */ +struct memblock_info { + uint8_t exchange_flag; + uint8_t used; + uint64_t vir_addr; + uint64_t pfn; + uint64_t mfn; +}; +#endif /* _RTE_DOM0_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h new file mode 100644 index 00000000..3dacbff8 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_interrupts.h @@ -0,0 +1,228 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RTE_INTERRUPTS_H_ +#error "don't include this file directly, please include generic " +#endif + +#ifndef _RTE_LINUXAPP_INTERRUPTS_H_ +#define _RTE_LINUXAPP_INTERRUPTS_H_ + +#define RTE_MAX_RXTX_INTR_VEC_ID 32 +#define RTE_INTR_VEC_ZERO_OFFSET 0 +#define RTE_INTR_VEC_RXTX_OFFSET 1 + +enum rte_intr_handle_type { + RTE_INTR_HANDLE_UNKNOWN = 0, + RTE_INTR_HANDLE_UIO, /**< uio device handle */ + RTE_INTR_HANDLE_UIO_INTX, /**< uio generic handle */ + RTE_INTR_HANDLE_VFIO_LEGACY, /**< vfio device handle (legacy) */ + RTE_INTR_HANDLE_VFIO_MSI, /**< vfio device handle (MSI) */ + RTE_INTR_HANDLE_VFIO_MSIX, /**< vfio device handle (MSIX) */ + RTE_INTR_HANDLE_ALARM, /**< alarm handle */ + RTE_INTR_HANDLE_EXT, /**< external handler */ + RTE_INTR_HANDLE_MAX +}; + +#define RTE_INTR_EVENT_ADD 1UL +#define RTE_INTR_EVENT_DEL 2UL + +typedef void (*rte_intr_event_cb_t)(int fd, void *arg); + +struct rte_epoll_data { + uint32_t event; /**< event type */ + void *data; /**< User data */ + rte_intr_event_cb_t cb_fun; /**< IN: callback fun */ + void *cb_arg; /**< IN: callback arg */ +}; + +enum { + RTE_EPOLL_INVALID = 0, + RTE_EPOLL_VALID, + RTE_EPOLL_EXEC, +}; + +/** interrupt epoll event obj, taken by epoll_event.ptr */ +struct rte_epoll_event { + volatile uint32_t status; /**< OUT: event status */ + int fd; /**< OUT: event fd */ + int epfd; /**< OUT: epoll instance the ev associated with */ + struct rte_epoll_data epdata; +}; + +/** Handle for interrupts. */ +struct rte_intr_handle { + union { + int vfio_dev_fd; /**< VFIO device file descriptor */ + int uio_cfg_fd; /**< UIO config file descriptor + for uio_pci_generic */ + }; + int fd; /**< interrupt event file descriptor */ + enum rte_intr_handle_type type; /**< handle type */ + uint32_t max_intr; /**< max interrupt requested */ + uint32_t nb_efd; /**< number of available efd(event fd) */ + int efds[RTE_MAX_RXTX_INTR_VEC_ID]; /**< intr vectors/efds mapping */ + struct rte_epoll_event elist[RTE_MAX_RXTX_INTR_VEC_ID]; + /**< intr vector epoll event */ + int *intr_vec; /**< intr vector number array */ +}; + +#define RTE_EPOLL_PER_THREAD -1 /**< to hint using per thread epfd */ + +/** + * It waits for events on the epoll instance. + * + * @param epfd + * Epoll instance fd on which the caller wait for events. + * @param events + * Memory area contains the events that will be available for the caller. + * @param maxevents + * Up to maxevents are returned, must greater than zero. + * @param timeout + * Specifying a timeout of -1 causes a block indefinitely. + * Specifying a timeout equal to zero cause to return immediately. + * @return + * - On success, returns the number of available event. + * - On failure, a negative value. + */ +int +rte_epoll_wait(int epfd, struct rte_epoll_event *events, + int maxevents, int timeout); + +/** + * It performs control operations on epoll instance referred by the epfd. + * It requests that the operation op be performed for the target fd. + * + * @param epfd + * Epoll instance fd on which the caller perform control operations. + * @param op + * The operation be performed for the target fd. + * @param fd + * The target fd on which the control ops perform. + * @param event + * Describes the object linked to the fd. + * Note: The caller must take care the object deletion after CTL_DEL. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_epoll_ctl(int epfd, int op, int fd, + struct rte_epoll_event *event); + +/** + * The function returns the per thread epoll instance. + * + * @return + * epfd the epoll instance referred to. + */ +int +rte_intr_tls_epfd(void); + +/** + * @param intr_handle + * Pointer to the interrupt handle. + * @param epfd + * Epoll instance fd which the intr vector associated to. + * @param op + * The operation be performed for the vector. + * Operation type of {ADD, DEL}. + * @param vec + * RX intr vector number added to the epoll instance wait list. + * @param data + * User raw data. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_rx_ctl(struct rte_intr_handle *intr_handle, + int epfd, int op, unsigned int vec, void *data); + +/** + * It enables the packet I/O interrupt event if it's necessary. + * It creates event fd for each interrupt vector when MSIX is used, + * otherwise it multiplexes a single event fd. + * + * @param intr_handle + * Pointer to the interrupt handle. + * @param nb_efd + * Number of interrupt vector trying to enable. + * The value 0 is not allowed. + * @return + * - On success, zero. + * - On failure, a negative value. + */ +int +rte_intr_efd_enable(struct rte_intr_handle *intr_handle, uint32_t nb_efd); + +/** + * It disables the packet I/O interrupt event. + * It deletes registered eventfds and closes the open fds. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +void +rte_intr_efd_disable(struct rte_intr_handle *intr_handle); + +/** + * The packet I/O interrupt on datapath is enabled or not. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_dp_is_en(struct rte_intr_handle *intr_handle); + +/** + * The interrupt handle instance allows other causes or not. + * Other causes stand for any none packet I/O interrupts. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_allow_others(struct rte_intr_handle *intr_handle); + +/** + * The multiple interrupt vector capability of interrupt handle instance. + * It returns zero if no multiple interrupt vector support. + * + * @param intr_handle + * Pointer to the interrupt handle. + */ +int +rte_intr_cap_multiple(struct rte_intr_handle *intr_handle); + +#endif /* _RTE_LINUXAPP_INTERRUPTS_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h new file mode 100644 index 00000000..7e5e5984 --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/include/exec-env/rte_kni_common.h @@ -0,0 +1,172 @@ +/*- + * This file is provided under a dual BSD/LGPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GNU LESSER GENERAL PUBLIC LICENSE + * + * Copyright(c) 2007-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2.1 of the GNU Lesser General Public License + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * + * Contact Information: + * Intel Corporation + * + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#ifndef _RTE_KNI_COMMON_H_ +#define _RTE_KNI_COMMON_H_ + +#ifdef __KERNEL__ +#include +#endif + +/** + * KNI name is part of memzone name. + */ +#define RTE_KNI_NAMESIZE 32 + +#define RTE_CACHE_LINE_MIN_SIZE 64 + +/* + * Request id. + */ +enum rte_kni_req_id { + RTE_KNI_REQ_UNKNOWN = 0, + RTE_KNI_REQ_CHANGE_MTU, + RTE_KNI_REQ_CFG_NETWORK_IF, + RTE_KNI_REQ_MAX, +}; + +/* + * Structure for KNI request. + */ +struct rte_kni_request { + uint32_t req_id; /**< Request id */ + union { + uint32_t new_mtu; /**< New MTU */ + uint8_t if_up; /**< 1: interface up, 0: interface down */ + }; + int32_t result; /**< Result for processing request */ +} __attribute__((__packed__)); + +/* + * Fifo struct mapped in a shared memory. It describes a circular buffer FIFO + * Write and read should wrap around. Fifo is empty when write == read + * Writing should never overwrite the read position + */ +struct rte_kni_fifo { + volatile unsigned write; /**< Next position to be written*/ + volatile unsigned read; /**< Next position to be read */ + unsigned len; /**< Circular buffer length */ + unsigned elem_size; /**< Pointer size - for 32/64 bit OS */ + void * volatile buffer[0]; /**< The buffer contains mbuf pointers */ +}; + +/* + * The kernel image of the rte_mbuf struct, with only the relevant fields. + * Padding is necessary to assure the offsets of these fields + */ +struct rte_kni_mbuf { + void *buf_addr __attribute__((__aligned__(RTE_CACHE_LINE_SIZE))); + char pad0[10]; + uint16_t data_off; /**< Start address of data in segment buffer. */ + char pad1[4]; + uint64_t ol_flags; /**< Offload features. */ + char pad2[4]; + uint32_t pkt_len; /**< Total pkt len: sum of all segment data_len. */ + uint16_t data_len; /**< Amount of data in segment buffer. */ + + /* fields on second cache line */ + char pad3[8] __attribute__((__aligned__(RTE_CACHE_LINE_MIN_SIZE))); + void *pool; + void *next; +}; + +/* + * Struct used to create a KNI device. Passed to the kernel in IOCTL call + */ + +struct rte_kni_device_info { + char name[RTE_KNI_NAMESIZE]; /**< Network device name for KNI */ + + phys_addr_t tx_phys; + phys_addr_t rx_phys; + phys_addr_t alloc_phys; + phys_addr_t free_phys; + + /* Used by Ethtool */ + phys_addr_t req_phys; + phys_addr_t resp_phys; + phys_addr_t sync_phys; + void * sync_va; + + /* mbuf mempool */ + void * mbuf_va; + phys_addr_t mbuf_phys; + + /* PCI info */ + uint16_t vendor_id; /**< Vendor ID or PCI_ANY_ID. */ + uint16_t device_id; /**< Device ID or PCI_ANY_ID. */ + uint8_t bus; /**< Device bus */ + uint8_t devid; /**< Device ID */ + uint8_t function; /**< Device function. */ + + uint16_t group_id; /**< Group ID */ + uint32_t core_id; /**< core ID to bind for kernel thread */ + + uint8_t force_bind : 1; /**< Flag for kernel thread binding */ + + /* mbuf size */ + unsigned mbuf_size; +}; + +#define KNI_DEVICE "kni" + +#define RTE_KNI_IOCTL_TEST _IOWR(0, 1, int) +#define RTE_KNI_IOCTL_CREATE _IOWR(0, 2, struct rte_kni_device_info) +#define RTE_KNI_IOCTL_RELEASE _IOWR(0, 3, struct rte_kni_device_info) + +#endif /* _RTE_KNI_COMMON_H_ */ diff --git a/lib/librte_eal/linuxapp/eal/rte_eal_version.map b/lib/librte_eal/linuxapp/eal/rte_eal_version.map new file mode 100644 index 00000000..12503efa --- /dev/null +++ b/lib/librte_eal/linuxapp/eal/rte_eal_version.map @@ -0,0 +1,156 @@ +DPDK_2.0 { + global: + + __rte_panic; + devargs_list; + eal_parse_sysfs_value; + eal_timer_source; + lcore_config; + pci_device_list; + pci_driver_list; + per_lcore__lcore_id; + per_lcore__rte_errno; + rte_calloc; + rte_calloc_socket; + rte_cpu_check_supported; + rte_cpu_get_flag_enabled; + rte_cycles_vmware_tsc_map; + rte_delay_us; + rte_dump_physmem_layout; + rte_dump_registers; + rte_dump_stack; + rte_dump_tailq; + rte_eal_alarm_cancel; + rte_eal_alarm_set; + rte_eal_dev_init; + rte_eal_devargs_add; + rte_eal_devargs_dump; + rte_eal_devargs_type_count; + rte_eal_driver_register; + rte_eal_driver_unregister; + rte_eal_get_configuration; + rte_eal_get_lcore_state; + rte_eal_get_physmem_layout; + rte_eal_get_physmem_size; + rte_eal_has_hugepages; + rte_eal_hpet_init; + rte_eal_init; + rte_eal_iopl_init; + rte_eal_lcore_role; + rte_eal_mp_remote_launch; + rte_eal_mp_wait_lcore; + rte_eal_parse_devargs_str; + rte_eal_pci_dump; + rte_eal_pci_probe; + rte_eal_pci_probe_one; + rte_eal_pci_register; + rte_eal_pci_scan; + rte_eal_pci_unregister; + rte_eal_process_type; + rte_eal_remote_launch; + rte_eal_tailq_lookup; + rte_eal_tailq_register; + rte_eal_vdev_init; + rte_eal_vdev_uninit; + rte_eal_wait_lcore; + rte_exit; + rte_free; + rte_get_hpet_cycles; + rte_get_hpet_hz; + rte_get_log_level; + rte_get_log_type; + rte_get_tsc_hz; + rte_hexdump; + rte_intr_callback_register; + rte_intr_callback_unregister; + rte_intr_disable; + rte_intr_enable; + rte_log; + rte_log_add_in_history; + rte_log_cur_msg_loglevel; + rte_log_cur_msg_logtype; + rte_log_dump_history; + rte_log_set_history; + rte_logs; + rte_malloc; + rte_malloc_dump_stats; + rte_malloc_get_socket_stats; + rte_malloc_set_limit; + rte_malloc_socket; + rte_malloc_validate; + rte_malloc_virt2phy; + rte_mem_lock_page; + rte_mem_phy2mch; + rte_mem_virt2phy; + rte_memdump; + rte_memory_get_nchannel; + rte_memory_get_nrank; + rte_memzone_dump; + rte_memzone_lookup; + rte_memzone_reserve; + rte_memzone_reserve_aligned; + rte_memzone_reserve_bounded; + rte_memzone_walk; + rte_openlog_stream; + rte_realloc; + rte_set_application_usage_hook; + rte_set_log_level; + rte_set_log_type; + rte_socket_id; + rte_strerror; + rte_strsplit; + rte_sys_gettid; + rte_thread_get_affinity; + rte_thread_set_affinity; + rte_vlog; + rte_xen_dom0_memory_attach; + rte_xen_dom0_memory_init; + rte_zmalloc; + rte_zmalloc_socket; + + local: *; +}; + +DPDK_2.1 { + global: + + rte_eal_pci_detach; + rte_eal_pci_read_config; + rte_eal_pci_write_config; + rte_epoll_ctl; + rte_epoll_wait; + rte_intr_allow_others; + rte_intr_dp_is_en; + rte_intr_efd_disable; + rte_intr_efd_enable; + rte_intr_rx_ctl; + rte_intr_tls_epfd; + rte_memzone_free; + +} DPDK_2.0; + +DPDK_2.2 { + global: + + rte_intr_cap_multiple; + rte_keepalive_create; + rte_keepalive_dispatch_pings; + rte_keepalive_mark_alive; + rte_keepalive_register_core; + rte_xen_dom0_supported; + +} DPDK_2.1; + +DPDK_16.04 { + global: + + rte_cpu_get_flag_name; + rte_eal_pci_ioport_map; + rte_eal_pci_ioport_read; + rte_eal_pci_ioport_unmap; + rte_eal_pci_ioport_write; + rte_eal_pci_map_device; + rte_eal_pci_unmap_device; + rte_eal_primary_proc_alive; + +} DPDK_2.2; diff --git a/lib/librte_eal/linuxapp/igb_uio/Makefile b/lib/librte_eal/linuxapp/igb_uio/Makefile new file mode 100644 index 00000000..ec6e702f --- /dev/null +++ b/lib/librte_eal/linuxapp/igb_uio/Makefile @@ -0,0 +1,53 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = igb_uio +MODULE_PATH = drivers/net/igb_uio + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=100 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -Winline -Wall -Werror +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h + +# +# all source are stored in SRCS-y +# +SRCS-y := igb_uio.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/igb_uio/compat.h b/lib/librte_eal/linuxapp/igb_uio/compat.h new file mode 100644 index 00000000..c1d45a66 --- /dev/null +++ b/lib/librte_eal/linuxapp/igb_uio/compat.h @@ -0,0 +1,116 @@ +/* + * Minimal wrappers to allow compiling igb_uio on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) +#define pci_cfg_access_lock pci_block_user_cfg_access +#define pci_cfg_access_unlock pci_unblock_user_cfg_access +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 18, 0) +#define HAVE_PTE_MASK_PAGE_IOMAP +#endif + +#ifndef PCI_MSIX_ENTRY_SIZE +#define PCI_MSIX_ENTRY_SIZE 16 +#define PCI_MSIX_ENTRY_LOWER_ADDR 0 +#define PCI_MSIX_ENTRY_UPPER_ADDR 4 +#define PCI_MSIX_ENTRY_DATA 8 +#define PCI_MSIX_ENTRY_VECTOR_CTRL 12 +#define PCI_MSIX_ENTRY_CTRL_MASKBIT 1 +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 34) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5, 9))) + +static int pci_num_vf(struct pci_dev *dev) +{ + struct iov { + int pos; + int nres; + u32 cap; + u16 ctrl; + u16 total; + u16 initial; + u16 nr_virtfn; + } *iov = (struct iov *)dev->sriov; + + if (!dev->is_physfn) + return 0; + + return iov->nr_virtfn; +} + +#endif /* < 2.6.34 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 3, 0) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 3))) + +/* Check if INTX works to control irq's. + * Set's INTX_DISABLE flag and reads it back + */ +static bool pci_intx_mask_supported(struct pci_dev *pdev) +{ + bool mask_supported = false; + uint16_t orig, new; + + pci_block_user_cfg_access(pdev); + pci_read_config_word(pdev, PCI_COMMAND, &orig); + pci_write_config_word(pdev, PCI_COMMAND, + orig ^ PCI_COMMAND_INTX_DISABLE); + pci_read_config_word(pdev, PCI_COMMAND, &new); + + if ((new ^ orig) & ~PCI_COMMAND_INTX_DISABLE) { + dev_err(&pdev->dev, "Command register changed from " + "0x%x to 0x%x: driver or hardware bug?\n", orig, new); + } else if ((new ^ orig) & PCI_COMMAND_INTX_DISABLE) { + mask_supported = true; + pci_write_config_word(pdev, PCI_COMMAND, orig); + } + pci_unblock_user_cfg_access(pdev); + + return mask_supported; +} + +static bool pci_check_and_mask_intx(struct pci_dev *pdev) +{ + bool pending; + uint32_t status; + + pci_block_user_cfg_access(pdev); + pci_read_config_dword(pdev, PCI_COMMAND, &status); + + /* interrupt is not ours, goes to out */ + pending = (((status >> 16) & PCI_STATUS_INTERRUPT) != 0); + if (pending) { + uint16_t old, new; + + old = status; + if (status != 0) + new = old & (~PCI_COMMAND_INTX_DISABLE); + else + new = old | PCI_COMMAND_INTX_DISABLE; + + if (old != new) + pci_write_config_word(pdev, PCI_COMMAND, new); + } + pci_unblock_user_cfg_access(pdev); + + return pending; +} + +#endif /* < 3.3.0 */ diff --git a/lib/librte_eal/linuxapp/igb_uio/igb_uio.c b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c new file mode 100644 index 00000000..72b26923 --- /dev/null +++ b/lib/librte_eal/linuxapp/igb_uio/igb_uio.c @@ -0,0 +1,580 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef CONFIG_XEN_DOM0 +#include +#endif +#include + +#include "compat.h" + +/** + * A structure describing the private information for a uio device. + */ +struct rte_uio_pci_dev { + struct uio_info info; + struct pci_dev *pdev; + enum rte_intr_mode mode; +}; + +static char *intr_mode; +static enum rte_intr_mode igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; + +/* sriov sysfs */ +static ssize_t +show_max_vfs(struct device *dev, struct device_attribute *attr, + char *buf) +{ + return snprintf(buf, 10, "%u\n", dev_num_vf(dev)); +} + +static ssize_t +store_max_vfs(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int err = 0; + unsigned long max_vfs; + struct pci_dev *pdev = to_pci_dev(dev); + + if (0 != kstrtoul(buf, 0, &max_vfs)) + return -EINVAL; + + if (0 == max_vfs) + pci_disable_sriov(pdev); + else if (0 == pci_num_vf(pdev)) + err = pci_enable_sriov(pdev, max_vfs); + else /* do nothing if change max_vfs number */ + err = -EINVAL; + + return err ? err : count; +} + +#ifdef RTE_PCI_CONFIG +static ssize_t +show_extended_tag(struct device *dev, struct device_attribute *attr, char *buf) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} + +static ssize_t +store_extended_tag(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} + +static ssize_t +show_max_read_request_size(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} + +static ssize_t +store_max_read_request_size(struct device *dev, + struct device_attribute *attr, + const char *buf, + size_t count) +{ + dev_info(dev, "Deprecated\n"); + + return 0; +} +#endif + +static DEVICE_ATTR(max_vfs, S_IRUGO | S_IWUSR, show_max_vfs, store_max_vfs); +#ifdef RTE_PCI_CONFIG +static DEVICE_ATTR(extended_tag, S_IRUGO | S_IWUSR, show_extended_tag, + store_extended_tag); +static DEVICE_ATTR(max_read_request_size, S_IRUGO | S_IWUSR, + show_max_read_request_size, store_max_read_request_size); +#endif + +static struct attribute *dev_attrs[] = { + &dev_attr_max_vfs.attr, +#ifdef RTE_PCI_CONFIG + &dev_attr_extended_tag.attr, + &dev_attr_max_read_request_size.attr, +#endif + NULL, +}; + +static const struct attribute_group dev_attr_grp = { + .attrs = dev_attrs, +}; +/* + * It masks the msix on/off of generating MSI-X messages. + */ +static void +igbuio_msix_mask_irq(struct msi_desc *desc, int32_t state) +{ + u32 mask_bits = desc->masked; + unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE + + PCI_MSIX_ENTRY_VECTOR_CTRL; + + if (state != 0) + mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; + else + mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + + if (mask_bits != desc->masked) { + writel(mask_bits, desc->mask_base + offset); + readl(desc->mask_base); + desc->masked = mask_bits; + } +} + +/** + * This is the irqcontrol callback to be registered to uio_info. + * It can be used to disable/enable interrupt from user space processes. + * + * @param info + * pointer to uio_info. + * @param irq_state + * state value. 1 to enable interrupt, 0 to disable interrupt. + * + * @return + * - On success, 0. + * - On failure, a negative value. + */ +static int +igbuio_pci_irqcontrol(struct uio_info *info, s32 irq_state) +{ + struct rte_uio_pci_dev *udev = info->priv; + struct pci_dev *pdev = udev->pdev; + + pci_cfg_access_lock(pdev); + if (udev->mode == RTE_INTR_MODE_LEGACY) + pci_intx(pdev, !!irq_state); + + else if (udev->mode == RTE_INTR_MODE_MSIX) { + struct msi_desc *desc; + +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 3, 0)) + list_for_each_entry(desc, &pdev->msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); +#else + list_for_each_entry(desc, &pdev->dev.msi_list, list) + igbuio_msix_mask_irq(desc, irq_state); +#endif + } + pci_cfg_access_unlock(pdev); + + return 0; +} + +/** + * This is interrupt handler which will check if the interrupt is for the right device. + * If yes, disable it here and will be enable later. + */ +static irqreturn_t +igbuio_pci_irqhandler(int irq, struct uio_info *info) +{ + struct rte_uio_pci_dev *udev = info->priv; + + /* Legacy mode need to mask in hardware */ + if (udev->mode == RTE_INTR_MODE_LEGACY && + !pci_check_and_mask_intx(udev->pdev)) + return IRQ_NONE; + + /* Message signal mode, no share IRQ and automasked */ + return IRQ_HANDLED; +} + +#ifdef CONFIG_XEN_DOM0 +static int +igbuio_dom0_mmap_phys(struct uio_info *info, struct vm_area_struct *vma) +{ + int idx; + + idx = (int)vma->vm_pgoff; + vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); +#ifdef HAVE_PTE_MASK_PAGE_IOMAP + vma->vm_page_prot.pgprot |= _PAGE_IOMAP; +#endif + + return remap_pfn_range(vma, + vma->vm_start, + info->mem[idx].addr >> PAGE_SHIFT, + vma->vm_end - vma->vm_start, + vma->vm_page_prot); +} + +/** + * This is uio device mmap method which will use igbuio mmap for Xen + * Dom0 environment. + */ +static int +igbuio_dom0_pci_mmap(struct uio_info *info, struct vm_area_struct *vma) +{ + int idx; + + if (vma->vm_pgoff >= MAX_UIO_MAPS) + return -EINVAL; + + if (info->mem[vma->vm_pgoff].size == 0) + return -EINVAL; + + idx = (int)vma->vm_pgoff; + switch (info->mem[idx].memtype) { + case UIO_MEM_PHYS: + return igbuio_dom0_mmap_phys(info, vma); + case UIO_MEM_LOGICAL: + case UIO_MEM_VIRTUAL: + default: + return -EINVAL; + } +} +#endif + +/* Remap pci resources described by bar #pci_bar in uio resource n. */ +static int +igbuio_pci_setup_iomem(struct pci_dev *dev, struct uio_info *info, + int n, int pci_bar, const char *name) +{ + unsigned long addr, len; + void *internal_addr; + + if (n >= ARRAY_SIZE(info->mem)) + return -EINVAL; + + addr = pci_resource_start(dev, pci_bar); + len = pci_resource_len(dev, pci_bar); + if (addr == 0 || len == 0) + return -1; + internal_addr = ioremap(addr, len); + if (internal_addr == NULL) + return -1; + info->mem[n].name = name; + info->mem[n].addr = addr; + info->mem[n].internal_addr = internal_addr; + info->mem[n].size = len; + info->mem[n].memtype = UIO_MEM_PHYS; + return 0; +} + +/* Get pci port io resources described by bar #pci_bar in uio resource n. */ +static int +igbuio_pci_setup_ioport(struct pci_dev *dev, struct uio_info *info, + int n, int pci_bar, const char *name) +{ + unsigned long addr, len; + + if (n >= ARRAY_SIZE(info->port)) + return -EINVAL; + + addr = pci_resource_start(dev, pci_bar); + len = pci_resource_len(dev, pci_bar); + if (addr == 0 || len == 0) + return -EINVAL; + + info->port[n].name = name; + info->port[n].start = addr; + info->port[n].size = len; + info->port[n].porttype = UIO_PORT_X86; + + return 0; +} + +/* Unmap previously ioremap'd resources */ +static void +igbuio_pci_release_iomem(struct uio_info *info) +{ + int i; + + for (i = 0; i < MAX_UIO_MAPS; i++) { + if (info->mem[i].internal_addr) + iounmap(info->mem[i].internal_addr); + } +} + +static int +igbuio_setup_bars(struct pci_dev *dev, struct uio_info *info) +{ + int i, iom, iop, ret; + unsigned long flags; + static const char *bar_names[PCI_STD_RESOURCE_END + 1] = { + "BAR0", + "BAR1", + "BAR2", + "BAR3", + "BAR4", + "BAR5", + }; + + iom = 0; + iop = 0; + + for (i = 0; i < ARRAY_SIZE(bar_names); i++) { + if (pci_resource_len(dev, i) != 0 && + pci_resource_start(dev, i) != 0) { + flags = pci_resource_flags(dev, i); + if (flags & IORESOURCE_MEM) { + ret = igbuio_pci_setup_iomem(dev, info, iom, + i, bar_names[i]); + if (ret != 0) + return ret; + iom++; + } else if (flags & IORESOURCE_IO) { + ret = igbuio_pci_setup_ioport(dev, info, iop, + i, bar_names[i]); + if (ret != 0) + return ret; + iop++; + } + } + } + + return (iom != 0) ? ret : -ENOENT; +} + +#if LINUX_VERSION_CODE < KERNEL_VERSION(3, 8, 0) +static int __devinit +#else +static int +#endif +igbuio_pci_probe(struct pci_dev *dev, const struct pci_device_id *id) +{ + struct rte_uio_pci_dev *udev; + struct msix_entry msix_entry; + int err; + + udev = kzalloc(sizeof(struct rte_uio_pci_dev), GFP_KERNEL); + if (!udev) + return -ENOMEM; + + /* + * enable device: ask low-level code to enable I/O and + * memory + */ + err = pci_enable_device(dev); + if (err != 0) { + dev_err(&dev->dev, "Cannot enable PCI device\n"); + goto fail_free; + } + + /* + * reserve device's PCI memory regions for use by this + * module + */ + err = pci_request_regions(dev, "igb_uio"); + if (err != 0) { + dev_err(&dev->dev, "Cannot request regions\n"); + goto fail_disable; + } + + /* enable bus mastering on the device */ + pci_set_master(dev); + + /* remap IO memory */ + err = igbuio_setup_bars(dev, &udev->info); + if (err != 0) + goto fail_release_iomem; + + /* set 64-bit DMA mask */ + err = pci_set_dma_mask(dev, DMA_BIT_MASK(64)); + if (err != 0) { + dev_err(&dev->dev, "Cannot set DMA mask\n"); + goto fail_release_iomem; + } + + err = pci_set_consistent_dma_mask(dev, DMA_BIT_MASK(64)); + if (err != 0) { + dev_err(&dev->dev, "Cannot set consistent DMA mask\n"); + goto fail_release_iomem; + } + + /* fill uio infos */ + udev->info.name = "igb_uio"; + udev->info.version = "0.1"; + udev->info.handler = igbuio_pci_irqhandler; + udev->info.irqcontrol = igbuio_pci_irqcontrol; +#ifdef CONFIG_XEN_DOM0 + /* check if the driver run on Xen Dom0 */ + if (xen_initial_domain()) + udev->info.mmap = igbuio_dom0_pci_mmap; +#endif + udev->info.priv = udev; + udev->pdev = dev; + + switch (igbuio_intr_mode_preferred) { + case RTE_INTR_MODE_MSIX: + /* Only 1 msi-x vector needed */ + msix_entry.entry = 0; + if (pci_enable_msix(dev, &msix_entry, 1) == 0) { + dev_dbg(&dev->dev, "using MSI-X"); + udev->info.irq = msix_entry.vector; + udev->mode = RTE_INTR_MODE_MSIX; + break; + } + /* fall back to INTX */ + case RTE_INTR_MODE_LEGACY: + if (pci_intx_mask_supported(dev)) { + dev_dbg(&dev->dev, "using INTX"); + udev->info.irq_flags = IRQF_SHARED; + udev->info.irq = dev->irq; + udev->mode = RTE_INTR_MODE_LEGACY; + break; + } + dev_notice(&dev->dev, "PCI INTX mask not supported\n"); + /* fall back to no IRQ */ + case RTE_INTR_MODE_NONE: + udev->mode = RTE_INTR_MODE_NONE; + udev->info.irq = 0; + break; + + default: + dev_err(&dev->dev, "invalid IRQ mode %u", + igbuio_intr_mode_preferred); + err = -EINVAL; + goto fail_release_iomem; + } + + err = sysfs_create_group(&dev->dev.kobj, &dev_attr_grp); + if (err != 0) + goto fail_release_iomem; + + /* register uio driver */ + err = uio_register_device(&dev->dev, &udev->info); + if (err != 0) + goto fail_remove_group; + + pci_set_drvdata(dev, udev); + + dev_info(&dev->dev, "uio device registered with irq %lx\n", + udev->info.irq); + + return 0; + +fail_remove_group: + sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); +fail_release_iomem: + igbuio_pci_release_iomem(&udev->info); + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(udev->pdev); + pci_release_regions(dev); +fail_disable: + pci_disable_device(dev); +fail_free: + kfree(udev); + + return err; +} + +static void +igbuio_pci_remove(struct pci_dev *dev) +{ + struct rte_uio_pci_dev *udev = pci_get_drvdata(dev); + + sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); + uio_unregister_device(&udev->info); + igbuio_pci_release_iomem(&udev->info); + if (udev->mode == RTE_INTR_MODE_MSIX) + pci_disable_msix(dev); + pci_release_regions(dev); + pci_disable_device(dev); + pci_set_drvdata(dev, NULL); + kfree(udev); +} + +static int +igbuio_config_intr_mode(char *intr_str) +{ + if (!intr_str) { + pr_info("Use MSIX interrupt by default\n"); + return 0; + } + + if (!strcmp(intr_str, RTE_INTR_MODE_MSIX_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_MSIX; + pr_info("Use MSIX interrupt\n"); + } else if (!strcmp(intr_str, RTE_INTR_MODE_LEGACY_NAME)) { + igbuio_intr_mode_preferred = RTE_INTR_MODE_LEGACY; + pr_info("Use legacy interrupt\n"); + } else { + pr_info("Error: bad parameter - %s\n", intr_str); + return -EINVAL; + } + + return 0; +} + +static struct pci_driver igbuio_pci_driver = { + .name = "igb_uio", + .id_table = NULL, + .probe = igbuio_pci_probe, + .remove = igbuio_pci_remove, +}; + +static int __init +igbuio_pci_init_module(void) +{ + int ret; + + ret = igbuio_config_intr_mode(intr_mode); + if (ret < 0) + return ret; + + return pci_register_driver(&igbuio_pci_driver); +} + +static void __exit +igbuio_pci_exit_module(void) +{ + pci_unregister_driver(&igbuio_pci_driver); +} + +module_init(igbuio_pci_init_module); +module_exit(igbuio_pci_exit_module); + +module_param(intr_mode, charp, S_IRUGO); +MODULE_PARM_DESC(intr_mode, +"igb_uio interrupt mode (default=msix):\n" +" " RTE_INTR_MODE_MSIX_NAME " Use MSIX interrupt\n" +" " RTE_INTR_MODE_LEGACY_NAME " Use Legacy interrupt\n" +"\n"); + +MODULE_DESCRIPTION("UIO driver for Intel IGB PCI cards"); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/lib/librte_eal/linuxapp/kni/Makefile b/lib/librte_eal/linuxapp/kni/Makefile new file mode 100644 index 00000000..ac99d3f1 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/Makefile @@ -0,0 +1,93 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = rte_kni + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include -I$(SRCDIR)/ethtool/ixgbe -I$(SRCDIR)/ethtool/igb +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h +MODULE_CFLAGS += -Wall -Werror + +ifeq ($(shell lsb_release -si 2>/dev/null),Ubuntu) +MODULE_CFLAGS += -DUBUNTU_RELEASE_CODE=$(shell lsb_release -sr | tr -d .) +UBUNTU_KERNEL_CODE := $(shell echo `grep UTS_RELEASE $(RTE_KERNELDIR)/include/generated/utsrelease.h \ + | cut -d '"' -f2 | cut -d- -f1,2 | tr .- $(comma)`,1) +MODULE_CFLAGS += -D"UBUNTU_KERNEL_CODE=UBUNTU_KERNEL_VERSION($(UBUNTU_KERNEL_CODE))" +endif + +# this lib needs main eal +DEPDIRS-y += lib/librte_eal/linuxapp/eal + +# +# all source are stored in SRCS-y +# +SRCS-y := ethtool/ixgbe/ixgbe_main.c +SRCS-y += ethtool/ixgbe/ixgbe_api.c +SRCS-y += ethtool/ixgbe/ixgbe_common.c +SRCS-y += ethtool/ixgbe/ixgbe_ethtool.c +SRCS-y += ethtool/ixgbe/ixgbe_82599.c +SRCS-y += ethtool/ixgbe/ixgbe_82598.c +SRCS-y += ethtool/ixgbe/ixgbe_x540.c +SRCS-y += ethtool/ixgbe/ixgbe_phy.c +SRCS-y += ethtool/ixgbe/kcompat.c + +SRCS-y += ethtool/igb/e1000_82575.c +SRCS-y += ethtool/igb/e1000_i210.c +SRCS-y += ethtool/igb/e1000_api.c +SRCS-y += ethtool/igb/e1000_mac.c +SRCS-y += ethtool/igb/e1000_manage.c +SRCS-y += ethtool/igb/e1000_mbx.c +SRCS-y += ethtool/igb/e1000_nvm.c +SRCS-y += ethtool/igb/e1000_phy.c +SRCS-y += ethtool/igb/igb_ethtool.c +SRCS-y += ethtool/igb/igb_hwmon.c +SRCS-y += ethtool/igb/igb_main.c +SRCS-y += ethtool/igb/igb_debugfs.c +SRCS-y += ethtool/igb/igb_param.c +SRCS-y += ethtool/igb/igb_procfs.c +SRCS-y += ethtool/igb/igb_vmdq.c +#SRCS-y += ethtool/igb/igb_ptp.c +#SRCS-y += ethtool/igb/kcompat.c + +SRCS-y += kni_misc.c +SRCS-y += kni_net.c +SRCS-y += kni_ethtool.c +SRCS-$(CONFIG_RTE_KNI_VHOST) += kni_vhost.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/kni/compat.h b/lib/librte_eal/linuxapp/kni/compat.h new file mode 100644 index 00000000..cf100b67 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/compat.h @@ -0,0 +1,29 @@ +/* + * Minimal wrappers to allow compiling kni on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) + +#define sk_sleep(s) (s)->sk_sleep + +#endif /* < 2.6.35 */ + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) +#define HAVE_IOV_ITER_MSGHDR +#endif + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(4,1,0) ) +#define HAVE_KIOCB_MSG_PARAM +#endif /* < 4.1.0 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/README b/lib/librte_eal/linuxapp/kni/ethtool/README new file mode 100644 index 00000000..2cfefe72 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/README @@ -0,0 +1,100 @@ +.. + BSD LICENSE + + Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in + the documentation and/or other materials provided with the + distribution. + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Description + +In order to support ethtool in Kernel NIC Interface, the standard Linux kernel +drivers of ixgbe/igb are needed to be reused here. ixgbe-3.9.17 is the version +modified from in kernel NIC interface kernel module to support ixgbe NIC, and +igb-3.4.8 is the version modified from in kernel NIC interface kernel module to +support igb NIC. + +The source code package of ixgbe can be downloaded from sourceforge.net as below. +http://sourceforge.net/projects/e1000/files/ixgbe%20stable/ +Below source files are copied or modified from ixgbe. + +ixgbe_82598.h +ixgbe_82599.c +ixgbe_82599.h +ixgbe_api.c +ixgbe_api.h +ixgbe_common.c +ixgbe_common.h +ixgbe_dcb.h +ixgbe_ethtool.c +ixgbe_fcoe.h +ixgbe.h +ixgbe_main.c +ixgbe_mbx.h +ixgbe_osdep.h +ixgbe_phy.c +ixgbe_phy.h +ixgbe_sriov.h +ixgbe_type.h +kcompat.c +kcompat.h + +The source code package of igb can be downloaded from sourceforge.net as below. +http://sourceforge.net/projects/e1000/files/igb%20stable/ +Below source files are copied or modified from igb. + +e1000_82575.c +e1000_82575.h +e1000_api.c +e1000_api.h +e1000_defines.h +e1000_hw.h +e1000_mac.c +e1000_mac.h +e1000_manage.c +e1000_manage.h +e1000_mbx.c +e1000_mbx.h +e1000_nvm.c +e1000_nvm.h +e1000_osdep.h +e1000_phy.c +e1000_phy.h +e1000_regs.h +igb_ethtool.c +igb.h +igb_main.c +igb_param.c +igb_procfs.c +igb_regtest.h +igb_sysfs.c +igb_vmdq.c +igb_vmdq.h +kcompat.c +kcompat_ethtool.c +kcompat.h + diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING new file mode 100644 index 00000000..5f297e5b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/COPYING @@ -0,0 +1,339 @@ + +"This software program is licensed subject to the GNU General Public License +(GPL). Version 2, June 1991, available at +" + +GNU General Public License + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public License is intended +to guarantee your freedom to share and change free software--to make sure +the software is free for all its users. This General Public License applies +to most of the Free Software Foundation's software and to any other program +whose authors commit to using it. (Some other Free Software Foundation +software is covered by the GNU Library General Public License instead.) You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These +restrictions translate to certain responsibilities for you if you distribute +copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its +recipients to know that what they have is not the original, so that any +problems introduced by others will not reflect on the original authors' +reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will +individually obtain patent licenses, in effect making the program +proprietary. To prevent this, we have made it clear that any patent must be +licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice + placed by the copyright holder saying it may be distributed under the + terms of this General Public License. The "Program", below, refers to any + such program or work, and a "work based on the Program" means either the + Program or any derivative work under copyright law: that is to say, a + work containing the Program or a portion of it, either verbatim or with + modifications and/or translated into another language. (Hereinafter, + translation is included without limitation in the term "modification".) + Each licensee is addressed as "you". + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + the Program is not restricted, and the output from the Program is covered + only if its contents constitute a work based on the Program (independent + of having been made by running the Program). Whether that is true depends + on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code + as you receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice and + disclaimer of warranty; keep intact all the notices that refer to this + License and to the absence of any warranty; and give any other recipients + of the Program a copy of this License along with the Program. + + You may charge a fee for the physical act of transferring a copy, and you + may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, + thus forming a work based on the Program, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + * a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + * b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any part + thereof, to be licensed as a whole at no charge to all third parties + under the terms of this License. + + * c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive + use in the most ordinary way, to print or display an announcement + including an appropriate copyright notice and a notice that there is + no warranty (or else, saying that you provide a warranty) and that + users may redistribute the program under these conditions, and + telling the user how to view a copy of this License. (Exception: if + the Program itself is interactive but does not normally print such + an announcement, your work based on the Program is not required to + print an announcement.) + + These requirements apply to the modified work as a whole. If identifiable + sections of that work are not derived from the Program, and can be + reasonably considered independent and separate works in themselves, then + this License, and its terms, do not apply to those sections when you + distribute them as separate works. But when you distribute the same + sections as part of a whole which is a work based on the Program, the + distribution of the whole must be on the terms of this License, whose + permissions for other licensees extend to the entire whole, and thus to + each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Program. + + In addition, mere aggregation of another work not based on the Program + with the Program (or with a work based on the Program) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under + Section 2) in object code or executable form under the terms of Sections + 1 and 2 above provided that you also do one of the following: + + * a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 + above on a medium customarily used for software interchange; or, + + * b) Accompany it with a written offer, valid for at least three years, + to give any third party, for a charge no more than your cost of + physically performing source distribution, a complete machine- + readable copy of the corresponding source code, to be distributed + under the terms of Sections 1 and 2 above on a medium customarily + used for software interchange; or, + + * c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed + only for noncommercial distribution and only if you received the + program in object code or executable form with such an offer, in + accord with Subsection b above.) + + The source code for a work means the preferred form of the work for + making modifications to it. For an executable work, complete source code + means all the source code for all modules it contains, plus any + associated interface definition files, plus the scripts used to control + compilation and installation of the executable. However, as a special + exception, the source code distributed need not include anything that is + normally distributed (in either source or binary form) with the major + components (compiler, kernel, and so on) of the operating system on which + the executable runs, unless that component itself accompanies the + executable. + + If distribution of executable or object code is made by offering access + to copy from a designated place, then offering equivalent access to copy + the source code from the same place counts as distribution of the source + code, even though third parties are not compelled to copy the source + along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as + expressly provided under this License. Any attempt otherwise to copy, + modify, sublicense or distribute the Program is void, and will + automatically terminate your rights under this License. However, parties + who have received copies, or rights, from you under this License will not + have their licenses terminated so long as such parties remain in full + compliance. + +5. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Program or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Program (or any work based on the Program), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Program or works + based on it. + +6. Each time you redistribute the Program (or any work based on the + Program), the recipient automatically receives a license from the + original licensor to copy, distribute or modify the Program subject to + these terms and conditions. You may not impose any further restrictions + on the recipients' exercise of the rights granted herein. You are not + responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot distribute + so as to satisfy simultaneously your obligations under this License and + any other pertinent obligations, then as a consequence you may not + distribute the Program at all. For example, if a patent license would + not permit royalty-free redistribution of the Program by all those who + receive copies directly or indirectly through you, then the only way you + could satisfy both it and this License would be to refrain entirely from + distribution of the Program. + + If any portion of this section is held invalid or unenforceable under any + particular circumstance, the balance of the section is intended to apply + and the section as a whole is intended to apply in other circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system, which is implemented + by public license practices. Many people have made generous contributions + to the wide range of software distributed through that system in + reliance on consistent application of that system; it is up to the + author/donor to decide if he or she is willing to distribute software + through any other system and a licensee cannot impose that choice. + + This section is intended to make thoroughly clear what is believed to be + a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Program under this License may add an + explicit geographical distribution limitation excluding those countries, + so that distribution is permitted only in or among countries not thus + excluded. In such case, this License incorporates the limitation as if + written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of + the General Public License from time to time. Such new versions will be + similar in spirit to the present version, but may differ in detail to + address new problems or concerns. + + Each version is given a distinguishing version number. If the Program + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Program does not specify a version + number of this License, you may choose any version ever published by the + Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs + whose distribution conditions are different, write to the author to ask + for permission. For software which is copyrighted by the Free Software + Foundation, write to the Free Software Foundation; we sometimes make + exceptions for this. Our decision will be guided by the two goals of + preserving the free status of all derivatives of our free software and + of promoting the sharing and reuse of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH + YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it free +software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey the +exclusion of warranty; and each file should have at least the "copyright" +line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 +Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free +software, and you are welcome to redistribute it under certain conditions; +type 'show c' for details. + +The hypothetical commands 'show w' and 'show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than 'show w' and 'show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program +'Gnomovision' (which makes passes at compilers) written by James Hacker. + +signature of Ty Coon, 1 April 1989 +Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General Public +License instead of this License. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c new file mode 100644 index 00000000..b8c9a13f --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.c @@ -0,0 +1,3665 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* + * 82575EB Gigabit Network Connection + * 82575EB Gigabit Backplane Connection + * 82575GB Gigabit Network Connection + * 82576 Gigabit Network Connection + * 82576 Quad Port Gigabit Mezzanine Adapter + * 82580 Gigabit Network Connection + * I350 Gigabit Network Connection + */ + +#include "e1000_api.h" +#include "e1000_i210.h" + +static s32 e1000_init_phy_params_82575(struct e1000_hw *hw); +static s32 e1000_init_mac_params_82575(struct e1000_hw *hw); +static s32 e1000_acquire_phy_82575(struct e1000_hw *hw); +static void e1000_release_phy_82575(struct e1000_hw *hw); +static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw); +static void e1000_release_nvm_82575(struct e1000_hw *hw); +static s32 e1000_check_for_link_82575(struct e1000_hw *hw); +static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw); +static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw); +static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, + u16 *duplex); +static s32 e1000_init_hw_82575(struct e1000_hw *hw); +static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw); +static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, + u16 *data); +static s32 e1000_reset_hw_82575(struct e1000_hw *hw); +static s32 e1000_reset_hw_82580(struct e1000_hw *hw); +static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, + u32 offset, u16 *data); +static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, + u32 offset, u16 data); +static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, + bool active); +static s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, + bool active); +static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, + bool active); +static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw); +static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw); +static s32 e1000_get_media_type_82575(struct e1000_hw *hw); +static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw); +static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data); +static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, + u32 offset, u16 data); +static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw); +static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask); +static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, + u16 *speed, u16 *duplex); +static s32 e1000_get_phy_id_82575(struct e1000_hw *hw); +static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask); +static bool e1000_sgmii_active_82575(struct e1000_hw *hw); +static s32 e1000_reset_init_script_82575(struct e1000_hw *hw); +static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw); +static void e1000_config_collision_dist_82575(struct e1000_hw *hw); +static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw); +static void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw); +static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw); +static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw); +static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw); +static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw); +static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw); +static s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, + u16 offset); +static s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, + u16 offset); +static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw); +static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw); +static void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value); +static void e1000_clear_vfta_i350(struct e1000_hw *hw); + +static void e1000_i2c_start(struct e1000_hw *hw); +static void e1000_i2c_stop(struct e1000_hw *hw); +static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data); +static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data); +static s32 e1000_get_i2c_ack(struct e1000_hw *hw); +static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data); +static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data); +static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); +static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl); +static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data); +static bool e1000_get_i2c_data(u32 *i2cctl); + +static const u16 e1000_82580_rxpbs_table[] = { + 36, 72, 144, 1, 2, 4, 8, 16, 35, 70, 140 }; +#define E1000_82580_RXPBS_TABLE_SIZE \ + (sizeof(e1000_82580_rxpbs_table)/sizeof(u16)) + + +/** + * e1000_sgmii_uses_mdio_82575 - Determine if I2C pins are for external MDIO + * @hw: pointer to the HW structure + * + * Called to determine if the I2C pins are being used for I2C or as an + * external MDIO interface since the two options are mutually exclusive. + **/ +static bool e1000_sgmii_uses_mdio_82575(struct e1000_hw *hw) +{ + u32 reg = 0; + bool ext_mdio = false; + + DEBUGFUNC("e1000_sgmii_uses_mdio_82575"); + + switch (hw->mac.type) { + case e1000_82575: + case e1000_82576: + reg = E1000_READ_REG(hw, E1000_MDIC); + ext_mdio = !!(reg & E1000_MDIC_DEST); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + reg = E1000_READ_REG(hw, E1000_MDICNFG); + ext_mdio = !!(reg & E1000_MDICNFG_EXT_MDIO); + break; + default: + break; + } + return ext_mdio; +} + +/** + * e1000_init_phy_params_82575 - Init PHY func ptrs. + * @hw: pointer to the HW structure + **/ +static s32 e1000_init_phy_params_82575(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u32 ctrl_ext; + + DEBUGFUNC("e1000_init_phy_params_82575"); + + phy->ops.read_i2c_byte = e1000_read_i2c_byte_generic; + phy->ops.write_i2c_byte = e1000_write_i2c_byte_generic; + + if (hw->phy.media_type != e1000_media_type_copper) { + phy->type = e1000_phy_none; + goto out; + } + + phy->ops.power_up = e1000_power_up_phy_copper; + phy->ops.power_down = e1000_power_down_phy_copper_82575; + + phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT; + phy->reset_delay_us = 100; + + phy->ops.acquire = e1000_acquire_phy_82575; + phy->ops.check_reset_block = e1000_check_reset_block_generic; + phy->ops.commit = e1000_phy_sw_reset_generic; + phy->ops.get_cfg_done = e1000_get_cfg_done_82575; + phy->ops.release = e1000_release_phy_82575; + + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + + if (e1000_sgmii_active_82575(hw)) { + phy->ops.reset = e1000_phy_hw_reset_sgmii_82575; + ctrl_ext |= E1000_CTRL_I2C_ENA; + } else { + phy->ops.reset = e1000_phy_hw_reset_generic; + ctrl_ext &= ~E1000_CTRL_I2C_ENA; + } + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + e1000_reset_mdicnfg_82580(hw); + + if (e1000_sgmii_active_82575(hw) && !e1000_sgmii_uses_mdio_82575(hw)) { + phy->ops.read_reg = e1000_read_phy_reg_sgmii_82575; + phy->ops.write_reg = e1000_write_phy_reg_sgmii_82575; + } else { + switch (hw->mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i354: + phy->ops.read_reg = e1000_read_phy_reg_82580; + phy->ops.write_reg = e1000_write_phy_reg_82580; + break; + case e1000_i210: + case e1000_i211: + phy->ops.read_reg = e1000_read_phy_reg_gs40g; + phy->ops.write_reg = e1000_write_phy_reg_gs40g; + break; + default: + phy->ops.read_reg = e1000_read_phy_reg_igp; + phy->ops.write_reg = e1000_write_phy_reg_igp; + } + } + + /* Set phy->phy_addr and phy->id. */ + ret_val = e1000_get_phy_id_82575(hw); + + /* Verify phy id and set remaining function pointers */ + switch (phy->id) { + case M88E1543_E_PHY_ID: + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case M88E1111_I_PHY_ID: + phy->type = e1000_phy_m88; + phy->ops.check_polarity = e1000_check_polarity_m88; + phy->ops.get_info = e1000_get_phy_info_m88; + if (phy->id == I347AT4_E_PHY_ID || + phy->id == M88E1112_E_PHY_ID || + phy->id == M88E1340M_E_PHY_ID) + phy->ops.get_cable_length = + e1000_get_cable_length_m88_gen2; + else if (phy->id == M88E1543_E_PHY_ID) + phy->ops.get_cable_length = + e1000_get_cable_length_m88_gen2; + else + phy->ops.get_cable_length = e1000_get_cable_length_m88; + phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; + /* Check if this PHY is confgured for media swap. */ + if (phy->id == M88E1112_E_PHY_ID) { + u16 data; + + ret_val = phy->ops.write_reg(hw, + E1000_M88E1112_PAGE_ADDR, + 2); + if (ret_val) + goto out; + + ret_val = phy->ops.read_reg(hw, + E1000_M88E1112_MAC_CTRL_1, + &data); + if (ret_val) + goto out; + + data = (data & E1000_M88E1112_MAC_CTRL_1_MODE_MASK) >> + E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT; + if (data == E1000_M88E1112_AUTO_COPPER_SGMII || + data == E1000_M88E1112_AUTO_COPPER_BASEX) + hw->mac.ops.check_for_link = + e1000_check_for_link_media_swap; + } + break; + case IGP03E1000_E_PHY_ID: + case IGP04E1000_E_PHY_ID: + phy->type = e1000_phy_igp_3; + phy->ops.check_polarity = e1000_check_polarity_igp; + phy->ops.get_info = e1000_get_phy_info_igp; + phy->ops.get_cable_length = e1000_get_cable_length_igp_2; + phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_igp; + phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82575; + phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_generic; + break; + case I82580_I_PHY_ID: + case I350_I_PHY_ID: + phy->type = e1000_phy_82580; + phy->ops.check_polarity = e1000_check_polarity_82577; + phy->ops.force_speed_duplex = + e1000_phy_force_speed_duplex_82577; + phy->ops.get_cable_length = e1000_get_cable_length_82577; + phy->ops.get_info = e1000_get_phy_info_82577; + phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; + phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; + break; + case I210_I_PHY_ID: + phy->type = e1000_phy_i210; + phy->ops.check_polarity = e1000_check_polarity_m88; + phy->ops.get_info = e1000_get_phy_info_m88; + phy->ops.get_cable_length = e1000_get_cable_length_m88_gen2; + phy->ops.set_d0_lplu_state = e1000_set_d0_lplu_state_82580; + phy->ops.set_d3_lplu_state = e1000_set_d3_lplu_state_82580; + phy->ops.force_speed_duplex = e1000_phy_force_speed_duplex_m88; + break; + default: + ret_val = -E1000_ERR_PHY; + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_init_nvm_params_82575 - Init NVM func ptrs. + * @hw: pointer to the HW structure + **/ +s32 e1000_init_nvm_params_82575(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + u16 size; + + DEBUGFUNC("e1000_init_nvm_params_82575"); + + size = (u16)((eecd & E1000_EECD_SIZE_EX_MASK) >> + E1000_EECD_SIZE_EX_SHIFT); + /* + * Added to a constant, "size" becomes the left-shift value + * for setting word_size. + */ + size += NVM_WORD_SIZE_BASE_SHIFT; + + /* Just in case size is out of range, cap it to the largest + * EEPROM size supported + */ + if (size > 15) + size = 15; + + nvm->word_size = 1 << size; + if (hw->mac.type < e1000_i210) { + nvm->opcode_bits = 8; + nvm->delay_usec = 1; + + switch (nvm->override) { + case e1000_nvm_override_spi_large: + nvm->page_size = 32; + nvm->address_bits = 16; + break; + case e1000_nvm_override_spi_small: + nvm->page_size = 8; + nvm->address_bits = 8; + break; + default: + nvm->page_size = eecd & E1000_EECD_ADDR_BITS ? 32 : 8; + nvm->address_bits = eecd & E1000_EECD_ADDR_BITS ? + 16 : 8; + break; + } + if (nvm->word_size == (1 << 15)) + nvm->page_size = 128; + + nvm->type = e1000_nvm_eeprom_spi; + } else { + nvm->type = e1000_nvm_flash_hw; + } + + /* Function Pointers */ + nvm->ops.acquire = e1000_acquire_nvm_82575; + nvm->ops.release = e1000_release_nvm_82575; + if (nvm->word_size < (1 << 15)) + nvm->ops.read = e1000_read_nvm_eerd; + else + nvm->ops.read = e1000_read_nvm_spi; + + nvm->ops.write = e1000_write_nvm_spi; + nvm->ops.validate = e1000_validate_nvm_checksum_generic; + nvm->ops.update = e1000_update_nvm_checksum_generic; + nvm->ops.valid_led_default = e1000_valid_led_default_82575; + + /* override generic family function pointers for specific descendants */ + switch (hw->mac.type) { + case e1000_82580: + nvm->ops.validate = e1000_validate_nvm_checksum_82580; + nvm->ops.update = e1000_update_nvm_checksum_82580; + break; + case e1000_i350: + //case e1000_i354: + nvm->ops.validate = e1000_validate_nvm_checksum_i350; + nvm->ops.update = e1000_update_nvm_checksum_i350; + break; + default: + break; + } + + return E1000_SUCCESS; +} + +/** + * e1000_init_mac_params_82575 - Init MAC func ptrs. + * @hw: pointer to the HW structure + **/ +static s32 e1000_init_mac_params_82575(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + + DEBUGFUNC("e1000_init_mac_params_82575"); + + /* Derives media type */ + e1000_get_media_type_82575(hw); + /* Set mta register count */ + mac->mta_reg_count = 128; + /* Set uta register count */ + mac->uta_reg_count = (hw->mac.type == e1000_82575) ? 0 : 128; + /* Set rar entry count */ + mac->rar_entry_count = E1000_RAR_ENTRIES_82575; + if (mac->type == e1000_82576) + mac->rar_entry_count = E1000_RAR_ENTRIES_82576; + if (mac->type == e1000_82580) + mac->rar_entry_count = E1000_RAR_ENTRIES_82580; + if (mac->type == e1000_i350 || mac->type == e1000_i354) + mac->rar_entry_count = E1000_RAR_ENTRIES_I350; + + /* Enable EEE default settings for EEE supported devices */ + if (mac->type >= e1000_i350) + dev_spec->eee_disable = false; + + /* Allow a single clear of the SW semaphore on I210 and newer */ + if (mac->type >= e1000_i210) + dev_spec->clear_semaphore_once = true; + + /* Set if part includes ASF firmware */ + mac->asf_firmware_present = true; + /* FWSM register */ + mac->has_fwsm = true; + /* ARC supported; valid only if manageability features are enabled. */ + mac->arc_subsystem_valid = + !!(E1000_READ_REG(hw, E1000_FWSM) & E1000_FWSM_MODE_MASK); + + /* Function pointers */ + + /* bus type/speed/width */ + mac->ops.get_bus_info = e1000_get_bus_info_pcie_generic; + /* reset */ + if (mac->type >= e1000_82580) + mac->ops.reset_hw = e1000_reset_hw_82580; + else + mac->ops.reset_hw = e1000_reset_hw_82575; + /* hw initialization */ + mac->ops.init_hw = e1000_init_hw_82575; + /* link setup */ + mac->ops.setup_link = e1000_setup_link_generic; + /* physical interface link setup */ + mac->ops.setup_physical_interface = + (hw->phy.media_type == e1000_media_type_copper) + ? e1000_setup_copper_link_82575 : e1000_setup_serdes_link_82575; + /* physical interface shutdown */ + mac->ops.shutdown_serdes = e1000_shutdown_serdes_link_82575; + /* physical interface power up */ + mac->ops.power_up_serdes = e1000_power_up_serdes_link_82575; + /* check for link */ + mac->ops.check_for_link = e1000_check_for_link_82575; + /* read mac address */ + mac->ops.read_mac_addr = e1000_read_mac_addr_82575; + /* configure collision distance */ + mac->ops.config_collision_dist = e1000_config_collision_dist_82575; + /* multicast address update */ + mac->ops.update_mc_addr_list = e1000_update_mc_addr_list_generic; + if (hw->mac.type == e1000_i350 || mac->type == e1000_i354) { + /* writing VFTA */ + mac->ops.write_vfta = e1000_write_vfta_i350; + /* clearing VFTA */ + mac->ops.clear_vfta = e1000_clear_vfta_i350; + } else { + /* writing VFTA */ + mac->ops.write_vfta = e1000_write_vfta_generic; + /* clearing VFTA */ + mac->ops.clear_vfta = e1000_clear_vfta_generic; + } + if (hw->mac.type >= e1000_82580) + mac->ops.validate_mdi_setting = + e1000_validate_mdi_setting_crossover_generic; + /* ID LED init */ + mac->ops.id_led_init = e1000_id_led_init_generic; + /* blink LED */ + mac->ops.blink_led = e1000_blink_led_generic; + /* setup LED */ + mac->ops.setup_led = e1000_setup_led_generic; + /* cleanup LED */ + mac->ops.cleanup_led = e1000_cleanup_led_generic; + /* turn on/off LED */ + mac->ops.led_on = e1000_led_on_generic; + mac->ops.led_off = e1000_led_off_generic; + /* clear hardware counters */ + mac->ops.clear_hw_cntrs = e1000_clear_hw_cntrs_82575; + /* link info */ + mac->ops.get_link_up_info = e1000_get_link_up_info_82575; + /* get thermal sensor data */ + mac->ops.get_thermal_sensor_data = + e1000_get_thermal_sensor_data_generic; + mac->ops.init_thermal_sensor_thresh = + e1000_init_thermal_sensor_thresh_generic; + /* acquire SW_FW sync */ + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_82575; + mac->ops.release_swfw_sync = e1000_release_swfw_sync_82575; + if (mac->type >= e1000_i210) { + mac->ops.acquire_swfw_sync = e1000_acquire_swfw_sync_i210; + mac->ops.release_swfw_sync = e1000_release_swfw_sync_i210; + } + + /* set lan id for port to determine which phy lock to use */ + hw->mac.ops.set_lan_id(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_init_function_pointers_82575 - Init func ptrs. + * @hw: pointer to the HW structure + * + * Called to initialize all function pointers and parameters. + **/ +void e1000_init_function_pointers_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_init_function_pointers_82575"); + + hw->mac.ops.init_params = e1000_init_mac_params_82575; + hw->nvm.ops.init_params = e1000_init_nvm_params_82575; + hw->phy.ops.init_params = e1000_init_phy_params_82575; + hw->mbx.ops.init_params = e1000_init_mbx_params_pf; +} + +/** + * e1000_acquire_phy_82575 - Acquire rights to access PHY + * @hw: pointer to the HW structure + * + * Acquire access rights to the correct PHY. + **/ +static s32 e1000_acquire_phy_82575(struct e1000_hw *hw) +{ + u16 mask = E1000_SWFW_PHY0_SM; + + DEBUGFUNC("e1000_acquire_phy_82575"); + + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_SWFW_PHY1_SM; + else if (hw->bus.func == E1000_FUNC_2) + mask = E1000_SWFW_PHY2_SM; + else if (hw->bus.func == E1000_FUNC_3) + mask = E1000_SWFW_PHY3_SM; + + return hw->mac.ops.acquire_swfw_sync(hw, mask); +} + +/** + * e1000_release_phy_82575 - Release rights to access PHY + * @hw: pointer to the HW structure + * + * A wrapper to release access rights to the correct PHY. + **/ +static void e1000_release_phy_82575(struct e1000_hw *hw) +{ + u16 mask = E1000_SWFW_PHY0_SM; + + DEBUGFUNC("e1000_release_phy_82575"); + + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_SWFW_PHY1_SM; + else if (hw->bus.func == E1000_FUNC_2) + mask = E1000_SWFW_PHY2_SM; + else if (hw->bus.func == E1000_FUNC_3) + mask = E1000_SWFW_PHY3_SM; + + hw->mac.ops.release_swfw_sync(hw, mask); +} + +/** + * e1000_read_phy_reg_sgmii_82575 - Read PHY register using sgmii + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset using the serial gigabit media independent + * interface and stores the retrieved information in data. + **/ +static s32 e1000_read_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, + u16 *data) +{ + s32 ret_val = -E1000_ERR_PARAM; + + DEBUGFUNC("e1000_read_phy_reg_sgmii_82575"); + + if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { + DEBUGOUT1("PHY Address %u is out of range\n", offset); + goto out; + } + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_phy_reg_i2c(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_write_phy_reg_sgmii_82575 - Write PHY register using sgmii + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes the data to PHY register at the offset using the serial gigabit + * media independent interface. + **/ +static s32 e1000_write_phy_reg_sgmii_82575(struct e1000_hw *hw, u32 offset, + u16 data) +{ + s32 ret_val = -E1000_ERR_PARAM; + + DEBUGFUNC("e1000_write_phy_reg_sgmii_82575"); + + if (offset > E1000_MAX_SGMII_PHY_REG_ADDR) { + DEBUGOUT1("PHY Address %d is out of range\n", offset); + goto out; + } + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_write_phy_reg_i2c(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_get_phy_id_82575 - Retrieve PHY addr and id + * @hw: pointer to the HW structure + * + * Retrieves the PHY address and ID for both PHY's which do and do not use + * sgmi interface. + **/ +static s32 e1000_get_phy_id_82575(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_id; + u32 ctrl_ext; + u32 mdic; + + DEBUGFUNC("e1000_get_phy_id_82575"); + + /* i354 devices can have a PHY that needs an extra read for id */ + if (hw->mac.type == e1000_i354) + e1000_get_phy_id(hw); + + + /* + * For SGMII PHYs, we try the list of possible addresses until + * we find one that works. For non-SGMII PHYs + * (e.g. integrated copper PHYs), an address of 1 should + * work. The result of this function should mean phy->phy_addr + * and phy->id are set correctly. + */ + if (!e1000_sgmii_active_82575(hw)) { + phy->addr = 1; + ret_val = e1000_get_phy_id(hw); + goto out; + } + + if (e1000_sgmii_uses_mdio_82575(hw)) { + switch (hw->mac.type) { + case e1000_82575: + case e1000_82576: + mdic = E1000_READ_REG(hw, E1000_MDIC); + mdic &= E1000_MDIC_PHY_MASK; + phy->addr = mdic >> E1000_MDIC_PHY_SHIFT; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + mdic = E1000_READ_REG(hw, E1000_MDICNFG); + mdic &= E1000_MDICNFG_PHY_MASK; + phy->addr = mdic >> E1000_MDICNFG_PHY_SHIFT; + break; + default: + ret_val = -E1000_ERR_PHY; + goto out; + break; + } + ret_val = e1000_get_phy_id(hw); + goto out; + } + + /* Power on sgmii phy if it is disabled */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + E1000_WRITE_REG(hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_SDP3_DATA); + E1000_WRITE_FLUSH(hw); + msec_delay(300); + + /* + * The address field in the I2CCMD register is 3 bits and 0 is invalid. + * Therefore, we need to test 1-7 + */ + for (phy->addr = 1; phy->addr < 8; phy->addr++) { + ret_val = e1000_read_phy_reg_sgmii_82575(hw, PHY_ID1, &phy_id); + if (ret_val == E1000_SUCCESS) { + DEBUGOUT2("Vendor ID 0x%08X read at address %u\n", + phy_id, phy->addr); + /* + * At the time of this writing, The M88 part is + * the only supported SGMII PHY product. + */ + if (phy_id == M88_VENDOR) + break; + } else { + DEBUGOUT1("PHY address %u was unreadable\n", + phy->addr); + } + } + + /* A valid PHY type couldn't be found. */ + if (phy->addr == 8) { + phy->addr = 0; + ret_val = -E1000_ERR_PHY; + } else { + ret_val = e1000_get_phy_id(hw); + } + + /* restore previous sfp cage power state */ + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + +out: + return ret_val; +} + +/** + * e1000_phy_hw_reset_sgmii_82575 - Performs a PHY reset + * @hw: pointer to the HW structure + * + * Resets the PHY using the serial gigabit media independent interface. + **/ +static s32 e1000_phy_hw_reset_sgmii_82575(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_phy_hw_reset_sgmii_82575"); + + /* + * This isn't a true "hard" reset, but is the only reset + * available to us at this time. + */ + + DEBUGOUT("Soft resetting SGMII attached PHY...\n"); + + if (!(hw->phy.ops.write_reg)) + goto out; + + /* + * SFP documentation requires the following to configure the SPF module + * to work on SGMII. No further documentation is given. + */ + ret_val = hw->phy.ops.write_reg(hw, 0x1B, 0x8084); + if (ret_val) + goto out; + + ret_val = hw->phy.ops.commit(hw); + +out: + return ret_val; +} + +/** + * e1000_set_d0_lplu_state_82575 - Set Low Power Linkup D0 state + * @hw: pointer to the HW structure + * @active: true to enable LPLU, false to disable + * + * Sets the LPLU D0 state according to the active flag. When + * activating LPLU this function also disables smart speed + * and vice versa. LPLU will not be activated unless the + * device autonegotiation advertisement meets standards of + * either 10 or 10/100 or 10/100/1000 at all duplexes. + * This is a function pointer entry point only called by + * PHY setup routines. + **/ +static s32 e1000_set_d0_lplu_state_82575(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 data; + + DEBUGFUNC("e1000_set_d0_lplu_state_82575"); + + if (!(hw->phy.ops.read_reg)) + goto out; + + ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); + if (ret_val) + goto out; + + if (active) { + data |= IGP02E1000_PM_D0_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + if (ret_val) + goto out; + + /* When LPLU is enabled, we should disable SmartSpeed */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + &data); + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + goto out; + } else { + data &= ~IGP02E1000_PM_D0_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + /* + * LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + goto out; + + data |= IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + goto out; + } else if (phy->smart_speed == e1000_smart_speed_off) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + goto out; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + goto out; + } + } + +out: + return ret_val; +} + +/** + * e1000_set_d0_lplu_state_82580 - Set Low Power Linkup D0 state + * @hw: pointer to the HW structure + * @active: true to enable LPLU, false to disable + * + * Sets the LPLU D0 state according to the active flag. When + * activating LPLU this function also disables smart speed + * and vice versa. LPLU will not be activated unless the + * device autonegotiation advertisement meets standards of + * either 10 or 10/100 or 10/100/1000 at all duplexes. + * This is a function pointer entry point only called by + * PHY setup routines. + **/ +static s32 e1000_set_d0_lplu_state_82580(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u32 data; + + DEBUGFUNC("e1000_set_d0_lplu_state_82580"); + + data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); + + if (active) { + data |= E1000_82580_PM_D0_LPLU; + + /* When LPLU is enabled, we should disable SmartSpeed */ + data &= ~E1000_82580_PM_SPD; + } else { + data &= ~E1000_82580_PM_D0_LPLU; + + /* + * LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) + data |= E1000_82580_PM_SPD; + else if (phy->smart_speed == e1000_smart_speed_off) + data &= ~E1000_82580_PM_SPD; + } + + E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); + return ret_val; +} + +/** + * e1000_set_d3_lplu_state_82580 - Sets low power link up state for D3 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D3 + * and SmartSpeed is disabled when active is true, else clear lplu for D3 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. + **/ +s32 e1000_set_d3_lplu_state_82580(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u32 data; + + DEBUGFUNC("e1000_set_d3_lplu_state_82580"); + + data = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); + + if (!active) { + data &= ~E1000_82580_PM_D3_LPLU; + /* + * LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) + data |= E1000_82580_PM_SPD; + else if (phy->smart_speed == e1000_smart_speed_off) + data &= ~E1000_82580_PM_SPD; + } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || + (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || + (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { + data |= E1000_82580_PM_D3_LPLU; + /* When LPLU is enabled, we should disable SmartSpeed */ + data &= ~E1000_82580_PM_SPD; + } + + E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, data); + return ret_val; +} + +/** + * e1000_acquire_nvm_82575 - Request for access to EEPROM + * @hw: pointer to the HW structure + * + * Acquire the necessary semaphores for exclusive access to the EEPROM. + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -E1000_ERR_NVM (-1). + **/ +static s32 e1000_acquire_nvm_82575(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_acquire_nvm_82575"); + + ret_val = e1000_acquire_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + if (ret_val) + goto out; + + /* + * Check if there is some access + * error this access may hook on + */ + if (hw->mac.type == e1000_i350) { + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + if (eecd & (E1000_EECD_BLOCKED | E1000_EECD_ABORT | + E1000_EECD_TIMEOUT)) { + /* Clear all access error flags */ + E1000_WRITE_REG(hw, E1000_EECD, eecd | + E1000_EECD_ERROR_CLR); + DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); + } + } + if (hw->mac.type == e1000_82580) { + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + if (eecd & E1000_EECD_BLOCKED) { + /* Clear access error flag */ + E1000_WRITE_REG(hw, E1000_EECD, eecd | + E1000_EECD_BLOCKED); + DEBUGOUT("Nvm bit banging access error detected and cleared.\n"); + } + } + + + ret_val = e1000_acquire_nvm_generic(hw); + if (ret_val) + e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); + +out: + return ret_val; +} + +/** + * e1000_release_nvm_82575 - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit, + * then release the semaphores acquired. + **/ +static void e1000_release_nvm_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_release_nvm_82575"); + + e1000_release_nvm_generic(hw); + + e1000_release_swfw_sync_82575(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_82575 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +static s32 e1000_acquire_swfw_sync_82575(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ + + DEBUGFUNC("e1000_acquire_swfw_sync_82575"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_generic(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync_82575 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +static void e1000_release_swfw_sync_82575(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_82575"); + + while (e1000_get_hw_semaphore_generic(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); +} + +/** + * e1000_get_cfg_done_82575 - Read config done bit + * @hw: pointer to the HW structure + * + * Read the management control register for the config done bit for + * completion status. NOTE: silicon which is EEPROM-less will fail trying + * to read the config done bit, so an error is *ONLY* logged and returns + * E1000_SUCCESS. If we were to return with error, EEPROM-less silicon + * would not be able to be reset or change link. + **/ +static s32 e1000_get_cfg_done_82575(struct e1000_hw *hw) +{ + s32 timeout = PHY_CFG_TIMEOUT; + s32 ret_val = E1000_SUCCESS; + u32 mask = E1000_NVM_CFG_DONE_PORT_0; + + DEBUGFUNC("e1000_get_cfg_done_82575"); + + if (hw->bus.func == E1000_FUNC_1) + mask = E1000_NVM_CFG_DONE_PORT_1; + else if (hw->bus.func == E1000_FUNC_2) + mask = E1000_NVM_CFG_DONE_PORT_2; + else if (hw->bus.func == E1000_FUNC_3) + mask = E1000_NVM_CFG_DONE_PORT_3; + while (timeout) { + if (E1000_READ_REG(hw, E1000_EEMNGCTL) & mask) + break; + msec_delay(1); + timeout--; + } + if (!timeout) + DEBUGOUT("MNG configuration cycle has not completed.\n"); + + /* If EEPROM is not marked present, init the PHY manually */ + if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES) && + (hw->phy.type == e1000_phy_igp_3)) + e1000_phy_init_script_igp3(hw); + + return ret_val; +} + +/** + * e1000_get_link_up_info_82575 - Get link speed/duplex info + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * This is a wrapper function, if using the serial gigabit media independent + * interface, use PCS to retrieve the link speed and duplex information. + * Otherwise, use the generic function to get the link speed and duplex info. + **/ +static s32 e1000_get_link_up_info_82575(struct e1000_hw *hw, u16 *speed, + u16 *duplex) +{ + s32 ret_val; + + DEBUGFUNC("e1000_get_link_up_info_82575"); + + if (hw->phy.media_type != e1000_media_type_copper) + ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, speed, + duplex); + else + ret_val = e1000_get_speed_and_duplex_copper_generic(hw, speed, + duplex); + + return ret_val; +} + +/** + * e1000_check_for_link_82575 - Check for link + * @hw: pointer to the HW structure + * + * If sgmii is enabled, then use the pcs register to determine link, otherwise + * use the generic interface for determining link. + **/ +static s32 e1000_check_for_link_82575(struct e1000_hw *hw) +{ + s32 ret_val; + u16 speed, duplex; + + DEBUGFUNC("e1000_check_for_link_82575"); + + if (hw->phy.media_type != e1000_media_type_copper) { + ret_val = e1000_get_pcs_speed_and_duplex_82575(hw, &speed, + &duplex); + /* + * Use this flag to determine if link needs to be checked or + * not. If we have link clear the flag so that we do not + * continue to check for link. + */ + hw->mac.get_link_status = !hw->mac.serdes_has_link; + + /* + * Configure Flow Control now that Auto-Neg has completed. + * First, we need to restore the desired flow control + * settings because we may have had to re-autoneg with a + * different link partner. + */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) + DEBUGOUT("Error configuring flow control\n"); + } else { + ret_val = e1000_check_for_copper_link_generic(hw); + } + + return ret_val; +} + +/** + * e1000_check_for_link_media_swap - Check which M88E1112 interface linked + * @hw: pointer to the HW structure + * + * Poll the M88E1112 interfaces to see which interface achieved link. + */ +static s32 e1000_check_for_link_media_swap(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + u8 port = 0; + + DEBUGFUNC("e1000_check_for_link_media_swap"); + + /* Check the copper medium. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 0); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); + if (ret_val) + return ret_val; + + if (data & E1000_M88E1112_STATUS_LINK) + port = E1000_MEDIA_PORT_COPPER; + + /* Check the other medium. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1112_PAGE_ADDR, 1); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1112_STATUS, &data); + if (ret_val) + return ret_val; + + if (data & E1000_M88E1112_STATUS_LINK) + port = E1000_MEDIA_PORT_OTHER; + + /* Determine if a swap needs to happen. */ + if (port && (hw->dev_spec._82575.media_port != port)) { + hw->dev_spec._82575.media_port = port; + hw->dev_spec._82575.media_changed = true; + } else { + ret_val = e1000_check_for_link_82575(hw); + } + + return E1000_SUCCESS; +} + +/** + * e1000_power_up_serdes_link_82575 - Power up the serdes link after shutdown + * @hw: pointer to the HW structure + **/ +static void e1000_power_up_serdes_link_82575(struct e1000_hw *hw) +{ + u32 reg; + + DEBUGFUNC("e1000_power_up_serdes_link_82575"); + + if ((hw->phy.media_type != e1000_media_type_internal_serdes) && + !e1000_sgmii_active_82575(hw)) + return; + + /* Enable PCS to turn on link */ + reg = E1000_READ_REG(hw, E1000_PCS_CFG0); + reg |= E1000_PCS_CFG_PCS_EN; + E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); + + /* Power up the laser */ + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg &= ~E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); + + /* flush the write to verify completion */ + E1000_WRITE_FLUSH(hw); + msec_delay(1); +} + +/** + * e1000_get_pcs_speed_and_duplex_82575 - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Using the physical coding sub-layer (PCS), retrieve the current speed and + * duplex, then store the values in the pointers provided. + **/ +static s32 e1000_get_pcs_speed_and_duplex_82575(struct e1000_hw *hw, + u16 *speed, u16 *duplex) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 pcs; + u32 status; + + DEBUGFUNC("e1000_get_pcs_speed_and_duplex_82575"); + + /* + * Read the PCS Status register for link state. For non-copper mode, + * the status register is not accurate. The PCS status register is + * used instead. + */ + pcs = E1000_READ_REG(hw, E1000_PCS_LSTAT); + + /* + * The link up bit determines when link is up on autoneg. + */ + if (pcs & E1000_PCS_LSTS_LINK_OK) { + mac->serdes_has_link = true; + + /* Detect and store PCS speed */ + if (pcs & E1000_PCS_LSTS_SPEED_1000) + *speed = SPEED_1000; + else if (pcs & E1000_PCS_LSTS_SPEED_100) + *speed = SPEED_100; + else + *speed = SPEED_10; + + /* Detect and store PCS duplex */ + if (pcs & E1000_PCS_LSTS_DUPLEX_FULL) + *duplex = FULL_DUPLEX; + else + *duplex = HALF_DUPLEX; + + /* Check if it is an I354 2.5Gb backplane connection. */ + if (mac->type == e1000_i354) { + status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) { + *speed = SPEED_2500; + *duplex = FULL_DUPLEX; + DEBUGOUT("2500 Mbs, "); + DEBUGOUT("Full Duplex\n"); + } + } + + } else { + mac->serdes_has_link = false; + *speed = 0; + *duplex = 0; + } + + return E1000_SUCCESS; +} + +/** + * e1000_shutdown_serdes_link_82575 - Remove link during power down + * @hw: pointer to the HW structure + * + * In the case of serdes shut down sfp and PCS on driver unload + * when management pass thru is not enabled. + **/ +void e1000_shutdown_serdes_link_82575(struct e1000_hw *hw) +{ + u32 reg; + + DEBUGFUNC("e1000_shutdown_serdes_link_82575"); + + if ((hw->phy.media_type != e1000_media_type_internal_serdes) && + !e1000_sgmii_active_82575(hw)) + return; + + if (!e1000_enable_mng_pass_thru(hw)) { + /* Disable PCS to turn off link */ + reg = E1000_READ_REG(hw, E1000_PCS_CFG0); + reg &= ~E1000_PCS_CFG_PCS_EN; + E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); + + /* shutdown the laser */ + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg |= E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg); + + /* flush the write to verify completion */ + E1000_WRITE_FLUSH(hw); + msec_delay(1); + } + + return; +} + +/** + * e1000_reset_hw_82575 - Reset hardware + * @hw: pointer to the HW structure + * + * This resets the hardware into a known state. + **/ +static s32 e1000_reset_hw_82575(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + + DEBUGFUNC("e1000_reset_hw_82575"); + + /* + * Prevent the PCI-E bus from sticking if there is no TLP connection + * on the last TLP read/write transaction when MAC is reset. + */ + ret_val = e1000_disable_pcie_master_generic(hw); + if (ret_val) + DEBUGOUT("PCI-E Master disable polling has failed.\n"); + + /* set the completion timeout for interface */ + ret_val = e1000_set_pcie_completion_timeout(hw); + if (ret_val) + DEBUGOUT("PCI-E Set completion timeout has failed.\n"); + + DEBUGOUT("Masking off all interrupts\n"); + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + + E1000_WRITE_REG(hw, E1000_RCTL, 0); + E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); + E1000_WRITE_FLUSH(hw); + + msec_delay(10); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + DEBUGOUT("Issuing a global reset to MAC\n"); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_RST); + + ret_val = e1000_get_auto_rd_done_generic(hw); + if (ret_val) { + /* + * When auto config read does not complete, do not + * return with an error. This can happen in situations + * where there is no eeprom and prevents getting link. + */ + DEBUGOUT("Auto Read Done did not complete\n"); + } + + /* If EEPROM is not present, run manual init scripts */ + if (!(E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_PRES)) + e1000_reset_init_script_82575(hw); + + /* Clear any pending interrupt events. */ + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + E1000_READ_REG(hw, E1000_ICR); + + /* Install any alternate MAC address into RAR0 */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + + return ret_val; +} + +/** + * e1000_init_hw_82575 - Initialize hardware + * @hw: pointer to the HW structure + * + * This inits the hardware readying it for operation. + **/ +static s32 e1000_init_hw_82575(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val; + u16 i, rar_count = mac->rar_entry_count; + + DEBUGFUNC("e1000_init_hw_82575"); + + /* Initialize identification LED */ + ret_val = mac->ops.id_led_init(hw); + if (ret_val) { + DEBUGOUT("Error initializing identification LED\n"); + /* This is not fatal and we should not stop init due to this */ + } + + /* Disabling VLAN filtering */ + DEBUGOUT("Initializing the IEEE VLAN\n"); + mac->ops.clear_vfta(hw); + + /* Setup the receive address */ + e1000_init_rx_addrs_generic(hw, rar_count); + + /* Zero out the Multicast HASH table */ + DEBUGOUT("Zeroing the MTA\n"); + for (i = 0; i < mac->mta_reg_count; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, 0); + + /* Zero out the Unicast HASH table */ + DEBUGOUT("Zeroing the UTA\n"); + for (i = 0; i < mac->uta_reg_count; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, 0); + + /* Setup link and flow control */ + ret_val = mac->ops.setup_link(hw); + + /* Set the default MTU size */ + hw->dev_spec._82575.mtu = 1500; + + /* + * Clear all of the statistics registers (clear on read). It is + * important that we do this after we have tried to establish link + * because the symbol error count will increment wildly if there + * is no link. + */ + e1000_clear_hw_cntrs_82575(hw); + + return ret_val; +} + +/** + * e1000_setup_copper_link_82575 - Configure copper link settings + * @hw: pointer to the HW structure + * + * Configures the link for auto-neg or forced speed and duplex. Then we check + * for link, once link is established calls to configure collision distance + * and flow control are called. + **/ +static s32 e1000_setup_copper_link_82575(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + u32 phpm_reg; + + DEBUGFUNC("e1000_setup_copper_link_82575"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_SLU; + ctrl &= ~(E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Clear Go Link Disconnect bit on supported devices */ + switch (hw->mac.type) { + case e1000_82580: + case e1000_i350: + case e1000_i210: + case e1000_i211: + phpm_reg = E1000_READ_REG(hw, E1000_82580_PHY_POWER_MGMT); + phpm_reg &= ~E1000_82580_PM_GO_LINKD; + E1000_WRITE_REG(hw, E1000_82580_PHY_POWER_MGMT, phpm_reg); + break; + default: + break; + } + + ret_val = e1000_setup_serdes_link_82575(hw); + if (ret_val) + goto out; + + if (e1000_sgmii_active_82575(hw) && !hw->phy.reset_disable) { + /* allow time for SFP cage time to power up phy */ + msec_delay(300); + + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + DEBUGOUT("Error resetting the PHY.\n"); + goto out; + } + } + switch (hw->phy.type) { + case e1000_phy_i210: + case e1000_phy_m88: + switch (hw->phy.id) { + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case M88E1543_E_PHY_ID: + case I210_I_PHY_ID: + ret_val = e1000_copper_link_setup_m88_gen2(hw); + break; + default: + ret_val = e1000_copper_link_setup_m88(hw); + break; + } + break; + case e1000_phy_igp_3: + ret_val = e1000_copper_link_setup_igp(hw); + break; + case e1000_phy_82580: + ret_val = e1000_copper_link_setup_82577(hw); + break; + default: + ret_val = -E1000_ERR_PHY; + break; + } + + if (ret_val) + goto out; + + ret_val = e1000_setup_copper_link_generic(hw); +out: + return ret_val; +} + +/** + * e1000_setup_serdes_link_82575 - Setup link for serdes + * @hw: pointer to the HW structure + * + * Configure the physical coding sub-layer (PCS) link. The PCS link is + * used on copper connections where the serialized gigabit media independent + * interface (sgmii), or serdes fiber is being used. Configures the link + * for auto-negotiation or forces speed/duplex. + **/ +static s32 e1000_setup_serdes_link_82575(struct e1000_hw *hw) +{ + u32 ctrl_ext, ctrl_reg, reg, anadv_reg; + bool pcs_autoneg; + s32 ret_val = E1000_SUCCESS; + u16 data; + + DEBUGFUNC("e1000_setup_serdes_link_82575"); + + if ((hw->phy.media_type != e1000_media_type_internal_serdes) && + !e1000_sgmii_active_82575(hw)) + return ret_val; + + /* + * On the 82575, SerDes loopback mode persists until it is + * explicitly turned off or a power cycle is performed. A read to + * the register does not indicate its status. Therefore, we ensure + * loopback mode is disabled during initialization. + */ + E1000_WRITE_REG(hw, E1000_SCTL, E1000_SCTL_DISABLE_SERDES_LOOPBACK); + + /* power on the sfp cage if present */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + + ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); + ctrl_reg |= E1000_CTRL_SLU; + + /* set both sw defined pins on 82575/82576*/ + if (hw->mac.type == e1000_82575 || hw->mac.type == e1000_82576) + ctrl_reg |= E1000_CTRL_SWDPIN0 | E1000_CTRL_SWDPIN1; + + reg = E1000_READ_REG(hw, E1000_PCS_LCTL); + + /* default pcs_autoneg to the same setting as mac autoneg */ + pcs_autoneg = hw->mac.autoneg; + + switch (ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK) { + case E1000_CTRL_EXT_LINK_MODE_SGMII: + /* sgmii mode lets the phy handle forcing speed/duplex */ + pcs_autoneg = true; + /* autoneg time out should be disabled for SGMII mode */ + reg &= ~(E1000_PCS_LCTL_AN_TIMEOUT); + break; + case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: + /* disable PCS autoneg and support parallel detect only */ + pcs_autoneg = false; + /* fall through to default case */ + default: + if (hw->mac.type == e1000_82575 || + hw->mac.type == e1000_82576) { + ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (data & E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT) + pcs_autoneg = false; + } + + /* + * non-SGMII modes only supports a speed of 1000/Full for the + * link so it is best to just force the MAC and let the pcs + * link either autoneg or be forced to 1000/Full + */ + ctrl_reg |= E1000_CTRL_SPD_1000 | E1000_CTRL_FRCSPD | + E1000_CTRL_FD | E1000_CTRL_FRCDPX; + + /* set speed of 1000/Full if speed/duplex is forced */ + reg |= E1000_PCS_LCTL_FSV_1000 | E1000_PCS_LCTL_FDV_FULL; + break; + } + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); + + /* + * New SerDes mode allows for forcing speed or autonegotiating speed + * at 1gb. Autoneg should be default set by most drivers. This is the + * mode that will be compatible with older link partners and switches. + * However, both are supported by the hardware and some drivers/tools. + */ + reg &= ~(E1000_PCS_LCTL_AN_ENABLE | E1000_PCS_LCTL_FLV_LINK_UP | + E1000_PCS_LCTL_FSD | E1000_PCS_LCTL_FORCE_LINK); + + if (pcs_autoneg) { + /* Set PCS register for autoneg */ + reg |= E1000_PCS_LCTL_AN_ENABLE | /* Enable Autoneg */ + E1000_PCS_LCTL_AN_RESTART; /* Restart autoneg */ + + /* Disable force flow control for autoneg */ + reg &= ~E1000_PCS_LCTL_FORCE_FCTRL; + + /* Configure flow control advertisement for autoneg */ + anadv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); + anadv_reg &= ~(E1000_TXCW_ASM_DIR | E1000_TXCW_PAUSE); + + switch (hw->fc.requested_mode) { + case e1000_fc_full: + case e1000_fc_rx_pause: + anadv_reg |= E1000_TXCW_ASM_DIR; + anadv_reg |= E1000_TXCW_PAUSE; + break; + case e1000_fc_tx_pause: + anadv_reg |= E1000_TXCW_ASM_DIR; + break; + default: + break; + } + + E1000_WRITE_REG(hw, E1000_PCS_ANADV, anadv_reg); + + DEBUGOUT1("Configuring Autoneg:PCS_LCTL=0x%08X\n", reg); + } else { + /* Set PCS register for forced link */ + reg |= E1000_PCS_LCTL_FSD; /* Force Speed */ + + /* Force flow control for forced link */ + reg |= E1000_PCS_LCTL_FORCE_FCTRL; + + DEBUGOUT1("Configuring Forced Link:PCS_LCTL=0x%08X\n", reg); + } + + E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); + + if (!pcs_autoneg && !e1000_sgmii_active_82575(hw)) + e1000_force_mac_fc_generic(hw); + + return ret_val; +} + +/** + * e1000_get_media_type_82575 - derives current media type. + * @hw: pointer to the HW structure + * + * The media type is chosen reflecting few settings. + * The following are taken into account: + * - link mode set in the current port Init Control Word #3 + * - current link mode settings in CSR register + * - MDIO vs. I2C PHY control interface chosen + * - SFP module media type + **/ +static s32 e1000_get_media_type_82575(struct e1000_hw *hw) +{ + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + s32 ret_val = E1000_SUCCESS; + u32 ctrl_ext = 0; + u32 link_mode = 0; + + /* Set internal phy as default */ + dev_spec->sgmii_active = false; + dev_spec->module_plugged = false; + + /* Get CSR setting */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + + /* extract link mode setting */ + link_mode = ctrl_ext & E1000_CTRL_EXT_LINK_MODE_MASK; + + switch (link_mode) { + case E1000_CTRL_EXT_LINK_MODE_1000BASE_KX: + hw->phy.media_type = e1000_media_type_internal_serdes; + break; + case E1000_CTRL_EXT_LINK_MODE_GMII: + hw->phy.media_type = e1000_media_type_copper; + break; + case E1000_CTRL_EXT_LINK_MODE_SGMII: + /* Get phy control interface type set (MDIO vs. I2C)*/ + if (e1000_sgmii_uses_mdio_82575(hw)) { + hw->phy.media_type = e1000_media_type_copper; + dev_spec->sgmii_active = true; + break; + } + /* fall through for I2C based SGMII */ + case E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES: + /* read media type from SFP EEPROM */ + ret_val = e1000_set_sfp_media_type_82575(hw); + if ((ret_val != E1000_SUCCESS) || + (hw->phy.media_type == e1000_media_type_unknown)) { + /* + * If media type was not identified then return media + * type defined by the CTRL_EXT settings. + */ + hw->phy.media_type = e1000_media_type_internal_serdes; + + if (link_mode == E1000_CTRL_EXT_LINK_MODE_SGMII) { + hw->phy.media_type = e1000_media_type_copper; + dev_spec->sgmii_active = true; + } + + break; + } + + /* do not change link mode for 100BaseFX */ + if (dev_spec->eth_flags.e100_base_fx) + break; + + /* change current link mode setting */ + ctrl_ext &= ~E1000_CTRL_EXT_LINK_MODE_MASK; + + if (hw->phy.media_type == e1000_media_type_copper) + ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_SGMII; + else + ctrl_ext |= E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + + break; + } + + return ret_val; +} + +/** + * e1000_set_sfp_media_type_82575 - derives SFP module media type. + * @hw: pointer to the HW structure + * + * The media type is chosen based on SFP module. + * compatibility flags retrieved from SFP ID EEPROM. + **/ +static s32 e1000_set_sfp_media_type_82575(struct e1000_hw *hw) +{ + s32 ret_val = E1000_ERR_CONFIG; + u32 ctrl_ext = 0; + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + struct sfp_e1000_flags *eth_flags = &dev_spec->eth_flags; + u8 tranceiver_type = 0; + s32 timeout = 3; + + /* Turn I2C interface ON and power on sfp cage */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext &= ~E1000_CTRL_EXT_SDP3_DATA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext | E1000_CTRL_I2C_ENA); + + E1000_WRITE_FLUSH(hw); + + /* Read SFP module data */ + while (timeout) { + ret_val = e1000_read_sfp_data_byte(hw, + E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_IDENTIFIER_OFFSET), + &tranceiver_type); + if (ret_val == E1000_SUCCESS) + break; + msec_delay(100); + timeout--; + } + if (ret_val != E1000_SUCCESS) + goto out; + + ret_val = e1000_read_sfp_data_byte(hw, + E1000_I2CCMD_SFP_DATA_ADDR(E1000_SFF_ETH_FLAGS_OFFSET), + (u8 *)eth_flags); + if (ret_val != E1000_SUCCESS) + goto out; + + /* Check if there is some SFP module plugged and powered */ + if ((tranceiver_type == E1000_SFF_IDENTIFIER_SFP) || + (tranceiver_type == E1000_SFF_IDENTIFIER_SFF)) { + dev_spec->module_plugged = true; + if (eth_flags->e1000_base_lx || eth_flags->e1000_base_sx) { + hw->phy.media_type = e1000_media_type_internal_serdes; + } else if (eth_flags->e100_base_fx) { + dev_spec->sgmii_active = true; + hw->phy.media_type = e1000_media_type_internal_serdes; + } else if (eth_flags->e1000_base_t) { + dev_spec->sgmii_active = true; + hw->phy.media_type = e1000_media_type_copper; + } else { + hw->phy.media_type = e1000_media_type_unknown; + DEBUGOUT("PHY module has not been recognized\n"); + goto out; + } + } else { + hw->phy.media_type = e1000_media_type_unknown; + } + ret_val = E1000_SUCCESS; +out: + /* Restore I2C interface setting */ + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + return ret_val; +} + +/** + * e1000_valid_led_default_82575 - Verify a valid default LED config + * @hw: pointer to the HW structure + * @data: pointer to the NVM (EEPROM) + * + * Read the EEPROM for the current default LED configuration. If the + * LED configuration is not valid, set to a valid LED configuration. + **/ +static s32 e1000_valid_led_default_82575(struct e1000_hw *hw, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_valid_led_default_82575"); + + ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { + switch (hw->phy.media_type) { + case e1000_media_type_internal_serdes: + *data = ID_LED_DEFAULT_82575_SERDES; + break; + case e1000_media_type_copper: + default: + *data = ID_LED_DEFAULT; + break; + } + } +out: + return ret_val; +} + +/** + * e1000_sgmii_active_82575 - Return sgmii state + * @hw: pointer to the HW structure + * + * 82575 silicon has a serialized gigabit media independent interface (sgmii) + * which can be enabled for use in the embedded applications. Simply + * return the current state of the sgmii interface. + **/ +static bool e1000_sgmii_active_82575(struct e1000_hw *hw) +{ + struct e1000_dev_spec_82575 *dev_spec = &hw->dev_spec._82575; + return dev_spec->sgmii_active; +} + +/** + * e1000_reset_init_script_82575 - Inits HW defaults after reset + * @hw: pointer to the HW structure + * + * Inits recommended HW defaults after a reset when there is no EEPROM + * detected. This is only for the 82575. + **/ +static s32 e1000_reset_init_script_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_reset_init_script_82575"); + + if (hw->mac.type == e1000_82575) { + DEBUGOUT("Running reset init script for 82575\n"); + /* SerDes configuration via SERDESCTRL */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x00, 0x0C); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x01, 0x78); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x1B, 0x23); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCTL, 0x23, 0x15); + + /* CCM configuration via CCMCTL register */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x14, 0x00); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_CCMCTL, 0x10, 0x00); + + /* PCIe lanes configuration */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x00, 0xEC); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x61, 0xDF); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x34, 0x05); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_GIOCTL, 0x2F, 0x81); + + /* PCIe PLL Configuration */ + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x02, 0x47); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x14, 0x00); + e1000_write_8bit_ctrl_reg_generic(hw, E1000_SCCTL, 0x10, 0x00); + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_mac_addr_82575 - Read device MAC address + * @hw: pointer to the HW structure + **/ +static s32 e1000_read_mac_addr_82575(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_read_mac_addr_82575"); + + /* + * If there's an alternate MAC address place it in RAR0 + * so that it will override the Si installed default perm + * address. + */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_mac_addr_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_config_collision_dist_82575 - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. + **/ +static void e1000_config_collision_dist_82575(struct e1000_hw *hw) +{ + u32 tctl_ext; + + DEBUGFUNC("e1000_config_collision_dist_82575"); + + tctl_ext = E1000_READ_REG(hw, E1000_TCTL_EXT); + + tctl_ext &= ~E1000_TCTL_EXT_COLD; + tctl_ext |= E1000_COLLISION_DISTANCE << E1000_TCTL_EXT_COLD_SHIFT; + + E1000_WRITE_REG(hw, E1000_TCTL_EXT, tctl_ext); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_power_down_phy_copper_82575 - Remove link during PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, remove the link. + **/ +static void e1000_power_down_phy_copper_82575(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + + if (!(phy->ops.check_reset_block)) + return; + + /* If the management interface is not enabled, then power down */ + if (!(e1000_enable_mng_pass_thru(hw) || phy->ops.check_reset_block(hw))) + e1000_power_down_phy_copper(hw); + + return; +} + +/** + * e1000_clear_hw_cntrs_82575 - Clear device specific hardware counters + * @hw: pointer to the HW structure + * + * Clears the hardware counters by reading the counter registers. + **/ +static void e1000_clear_hw_cntrs_82575(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_clear_hw_cntrs_82575"); + + e1000_clear_hw_cntrs_base_generic(hw); + + E1000_READ_REG(hw, E1000_PRC64); + E1000_READ_REG(hw, E1000_PRC127); + E1000_READ_REG(hw, E1000_PRC255); + E1000_READ_REG(hw, E1000_PRC511); + E1000_READ_REG(hw, E1000_PRC1023); + E1000_READ_REG(hw, E1000_PRC1522); + E1000_READ_REG(hw, E1000_PTC64); + E1000_READ_REG(hw, E1000_PTC127); + E1000_READ_REG(hw, E1000_PTC255); + E1000_READ_REG(hw, E1000_PTC511); + E1000_READ_REG(hw, E1000_PTC1023); + E1000_READ_REG(hw, E1000_PTC1522); + + E1000_READ_REG(hw, E1000_ALGNERRC); + E1000_READ_REG(hw, E1000_RXERRC); + E1000_READ_REG(hw, E1000_TNCRS); + E1000_READ_REG(hw, E1000_CEXTERR); + E1000_READ_REG(hw, E1000_TSCTC); + E1000_READ_REG(hw, E1000_TSCTFC); + + E1000_READ_REG(hw, E1000_MGTPRC); + E1000_READ_REG(hw, E1000_MGTPDC); + E1000_READ_REG(hw, E1000_MGTPTC); + + E1000_READ_REG(hw, E1000_IAC); + E1000_READ_REG(hw, E1000_ICRXOC); + + E1000_READ_REG(hw, E1000_ICRXPTC); + E1000_READ_REG(hw, E1000_ICRXATC); + E1000_READ_REG(hw, E1000_ICTXPTC); + E1000_READ_REG(hw, E1000_ICTXATC); + E1000_READ_REG(hw, E1000_ICTXQEC); + E1000_READ_REG(hw, E1000_ICTXQMTC); + E1000_READ_REG(hw, E1000_ICRXDMTC); + + E1000_READ_REG(hw, E1000_CBTMPC); + E1000_READ_REG(hw, E1000_HTDPMC); + E1000_READ_REG(hw, E1000_CBRMPC); + E1000_READ_REG(hw, E1000_RPTHC); + E1000_READ_REG(hw, E1000_HGPTC); + E1000_READ_REG(hw, E1000_HTCBDPC); + E1000_READ_REG(hw, E1000_HGORCL); + E1000_READ_REG(hw, E1000_HGORCH); + E1000_READ_REG(hw, E1000_HGOTCL); + E1000_READ_REG(hw, E1000_HGOTCH); + E1000_READ_REG(hw, E1000_LENERRS); + + /* This register should not be read in copper configurations */ + if ((hw->phy.media_type == e1000_media_type_internal_serdes) || + e1000_sgmii_active_82575(hw)) + E1000_READ_REG(hw, E1000_SCVPC); +} + +/** + * e1000_rx_fifo_flush_82575 - Clean rx fifo after Rx enable + * @hw: pointer to the HW structure + * + * After rx enable if managability is enabled then there is likely some + * bad data at the start of the fifo and possibly in the DMA fifo. This + * function clears the fifos and flushes any packets that came in as rx was + * being enabled. + **/ +void e1000_rx_fifo_flush_82575(struct e1000_hw *hw) +{ + u32 rctl, rlpml, rxdctl[4], rfctl, temp_rctl, rx_enabled; + int i, ms_wait; + + DEBUGFUNC("e1000_rx_fifo_workaround_82575"); + if (hw->mac.type != e1000_82575 || + !(E1000_READ_REG(hw, E1000_MANC) & E1000_MANC_RCV_TCO_EN)) + return; + + /* Disable all Rx queues */ + for (i = 0; i < 4; i++) { + rxdctl[i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); + E1000_WRITE_REG(hw, E1000_RXDCTL(i), + rxdctl[i] & ~E1000_RXDCTL_QUEUE_ENABLE); + } + /* Poll all queues to verify they have shut down */ + for (ms_wait = 0; ms_wait < 10; ms_wait++) { + msec_delay(1); + rx_enabled = 0; + for (i = 0; i < 4; i++) + rx_enabled |= E1000_READ_REG(hw, E1000_RXDCTL(i)); + if (!(rx_enabled & E1000_RXDCTL_QUEUE_ENABLE)) + break; + } + + if (ms_wait == 10) + DEBUGOUT("Queue disable timed out after 10ms\n"); + + /* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all + * incoming packets are rejected. Set enable and wait 2ms so that + * any packet that was coming in as RCTL.EN was set is flushed + */ + rfctl = E1000_READ_REG(hw, E1000_RFCTL); + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl & ~E1000_RFCTL_LEF); + + rlpml = E1000_READ_REG(hw, E1000_RLPML); + E1000_WRITE_REG(hw, E1000_RLPML, 0); + + rctl = E1000_READ_REG(hw, E1000_RCTL); + temp_rctl = rctl & ~(E1000_RCTL_EN | E1000_RCTL_SBP); + temp_rctl |= E1000_RCTL_LPE; + + E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl); + E1000_WRITE_REG(hw, E1000_RCTL, temp_rctl | E1000_RCTL_EN); + E1000_WRITE_FLUSH(hw); + msec_delay(2); + + /* Enable Rx queues that were previously enabled and restore our + * previous state + */ + for (i = 0; i < 4; i++) + E1000_WRITE_REG(hw, E1000_RXDCTL(i), rxdctl[i]); + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + E1000_WRITE_FLUSH(hw); + + E1000_WRITE_REG(hw, E1000_RLPML, rlpml); + E1000_WRITE_REG(hw, E1000_RFCTL, rfctl); + + /* Flush receive errors generated by workaround */ + E1000_READ_REG(hw, E1000_ROC); + E1000_READ_REG(hw, E1000_RNBC); + E1000_READ_REG(hw, E1000_MPC); +} + +/** + * e1000_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + * + * The defaults for 82575 and 82576 should be in the range of 50us to 50ms, + * however the hardware default for these parts is 500us to 1ms which is less + * than the 10ms recommended by the pci-e spec. To address this we need to + * increase the value to either 10ms to 200ms for capability version 1 config, + * or 16ms to 55ms for version 2. + **/ +static s32 e1000_set_pcie_completion_timeout(struct e1000_hw *hw) +{ + u32 gcr = E1000_READ_REG(hw, E1000_GCR); + s32 ret_val = E1000_SUCCESS; + u16 pcie_devctl2; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & E1000_GCR_CMPL_TMOUT_MASK) + goto out; + + /* + * if capababilities version is type 1 we can write the + * timeout of 10ms to 200ms through the GCR register + */ + if (!(gcr & E1000_GCR_CAP_VER2)) { + gcr |= E1000_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* + * for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + ret_val = e1000_read_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); + if (ret_val) + goto out; + + pcie_devctl2 |= PCIE_DEVICE_CONTROL2_16ms; + + ret_val = e1000_write_pcie_cap_reg(hw, PCIE_DEVICE_CONTROL2, + &pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~E1000_GCR_CMPL_TMOUT_RESEND; + + E1000_WRITE_REG(hw, E1000_GCR, gcr); + return ret_val; +} + +/** + * e1000_vmdq_set_anti_spoofing_pf - enable or disable anti-spoofing + * @hw: pointer to the hardware struct + * @enable: state to enter, either enabled or disabled + * @pf: Physical Function pool - do not set anti-spoofing for the PF + * + * enables/disables L2 switch anti-spoofing functionality. + **/ +void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf) +{ + u32 reg_val, reg_offset; + + switch (hw->mac.type) { + case e1000_82576: + reg_offset = E1000_DTXSWC; + break; + case e1000_i350: + case e1000_i354: + reg_offset = E1000_TXSWC; + break; + default: + return; + } + + reg_val = E1000_READ_REG(hw, reg_offset); + if (enable) { + reg_val |= (E1000_DTXSWC_MAC_SPOOF_MASK | + E1000_DTXSWC_VLAN_SPOOF_MASK); + /* The PF can spoof - it has to in order to + * support emulation mode NICs + */ + reg_val ^= (1 << pf | 1 << (pf + MAX_NUM_VFS)); + } else { + reg_val &= ~(E1000_DTXSWC_MAC_SPOOF_MASK | + E1000_DTXSWC_VLAN_SPOOF_MASK); + } + E1000_WRITE_REG(hw, reg_offset, reg_val); +} + +/** + * e1000_vmdq_set_loopback_pf - enable or disable vmdq loopback + * @hw: pointer to the hardware struct + * @enable: state to enter, either enabled or disabled + * + * enables/disables L2 switch loopback functionality. + **/ +void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable) +{ + u32 dtxswc; + + switch (hw->mac.type) { + case e1000_82576: + dtxswc = E1000_READ_REG(hw, E1000_DTXSWC); + if (enable) + dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; + else + dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; + E1000_WRITE_REG(hw, E1000_DTXSWC, dtxswc); + break; + case e1000_i350: + case e1000_i354: + dtxswc = E1000_READ_REG(hw, E1000_TXSWC); + if (enable) + dtxswc |= E1000_DTXSWC_VMDQ_LOOPBACK_EN; + else + dtxswc &= ~E1000_DTXSWC_VMDQ_LOOPBACK_EN; + E1000_WRITE_REG(hw, E1000_TXSWC, dtxswc); + break; + default: + /* Currently no other hardware supports loopback */ + break; + } + + +} + +/** + * e1000_vmdq_set_replication_pf - enable or disable vmdq replication + * @hw: pointer to the hardware struct + * @enable: state to enter, either enabled or disabled + * + * enables/disables replication of packets across multiple pools. + **/ +void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable) +{ + u32 vt_ctl = E1000_READ_REG(hw, E1000_VT_CTL); + + if (enable) + vt_ctl |= E1000_VT_CTL_VM_REPL_EN; + else + vt_ctl &= ~E1000_VT_CTL_VM_REPL_EN; + + E1000_WRITE_REG(hw, E1000_VT_CTL, vt_ctl); +} + +/** + * e1000_read_phy_reg_82580 - Read 82580 MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the MDI control register in the PHY at offset and stores the + * information read to data. + **/ +static s32 e1000_read_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_read_phy_reg_82580"); + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_read_phy_reg_mdic(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_write_phy_reg_82580 - Write 82580 MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write to register at offset + * + * Writes data to MDI control register in the PHY at offset. + **/ +static s32 e1000_write_phy_reg_82580(struct e1000_hw *hw, u32 offset, u16 data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_write_phy_reg_82580"); + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + goto out; + + ret_val = e1000_write_phy_reg_mdic(hw, offset, data); + + hw->phy.ops.release(hw); + +out: + return ret_val; +} + +/** + * e1000_reset_mdicnfg_82580 - Reset MDICNFG destination and com_mdio bits + * @hw: pointer to the HW structure + * + * This resets the the MDICNFG.Destination and MDICNFG.Com_MDIO bits based on + * the values found in the EEPROM. This addresses an issue in which these + * bits are not restored from EEPROM after reset. + **/ +static s32 e1000_reset_mdicnfg_82580(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 mdicnfg; + u16 nvm_data = 0; + + DEBUGFUNC("e1000_reset_mdicnfg_82580"); + + if (hw->mac.type != e1000_82580) + goto out; + if (!e1000_sgmii_active_82575(hw)) + goto out; + + ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + mdicnfg = E1000_READ_REG(hw, E1000_MDICNFG); + if (nvm_data & NVM_WORD24_EXT_MDIO) + mdicnfg |= E1000_MDICNFG_EXT_MDIO; + if (nvm_data & NVM_WORD24_COM_MDIO) + mdicnfg |= E1000_MDICNFG_COM_MDIO; + E1000_WRITE_REG(hw, E1000_MDICNFG, mdicnfg); +out: + return ret_val; +} + +/** + * e1000_reset_hw_82580 - Reset hardware + * @hw: pointer to the HW structure + * + * This resets function or entire device (all ports, etc.) + * to a known state. + **/ +static s32 e1000_reset_hw_82580(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + /* BH SW mailbox bit in SW_FW_SYNC */ + u16 swmbsw_mask = E1000_SW_SYNCH_MB; + u32 ctrl; + bool global_device_reset = hw->dev_spec._82575.global_device_reset; + + DEBUGFUNC("e1000_reset_hw_82580"); + + hw->dev_spec._82575.global_device_reset = false; + + /* 82580 does not reliably do global_device_reset due to hw errata */ + if (hw->mac.type == e1000_82580) + global_device_reset = false; + + /* Get current control state. */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* + * Prevent the PCI-E bus from sticking if there is no TLP connection + * on the last TLP read/write transaction when MAC is reset. + */ + ret_val = e1000_disable_pcie_master_generic(hw); + if (ret_val) + DEBUGOUT("PCI-E Master disable polling has failed.\n"); + + DEBUGOUT("Masking off all interrupts\n"); + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + E1000_WRITE_REG(hw, E1000_RCTL, 0); + E1000_WRITE_REG(hw, E1000_TCTL, E1000_TCTL_PSP); + E1000_WRITE_FLUSH(hw); + + msec_delay(10); + + /* Determine whether or not a global dev reset is requested */ + if (global_device_reset && hw->mac.ops.acquire_swfw_sync(hw, + swmbsw_mask)) + global_device_reset = false; + + if (global_device_reset && !(E1000_READ_REG(hw, E1000_STATUS) & + E1000_STAT_DEV_RST_SET)) + ctrl |= E1000_CTRL_DEV_RST; + else + ctrl |= E1000_CTRL_RST; + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + /* Add delay to insure DEV_RST has time to complete */ + if (global_device_reset) + msec_delay(5); + + ret_val = e1000_get_auto_rd_done_generic(hw); + if (ret_val) { + /* + * When auto config read does not complete, do not + * return with an error. This can happen in situations + * where there is no eeprom and prevents getting link. + */ + DEBUGOUT("Auto Read Done did not complete\n"); + } + + /* clear global device reset status bit */ + E1000_WRITE_REG(hw, E1000_STATUS, E1000_STAT_DEV_RST_SET); + + /* Clear any pending interrupt events. */ + E1000_WRITE_REG(hw, E1000_IMC, 0xffffffff); + E1000_READ_REG(hw, E1000_ICR); + + ret_val = e1000_reset_mdicnfg_82580(hw); + if (ret_val) + DEBUGOUT("Could not reset MDICNFG based on EEPROM\n"); + + /* Install any alternate MAC address into RAR0 */ + ret_val = e1000_check_alt_mac_addr_generic(hw); + + /* Release semaphore */ + if (global_device_reset) + hw->mac.ops.release_swfw_sync(hw, swmbsw_mask); + + return ret_val; +} + +/** + * e1000_rxpbs_adjust_82580 - adjust RXPBS value to reflect actual Rx PBA size + * @data: data received by reading RXPBS register + * + * The 82580 uses a table based approach for packet buffer allocation sizes. + * This function converts the retrieved value into the correct table value + * 0x0 0x1 0x2 0x3 0x4 0x5 0x6 0x7 + * 0x0 36 72 144 1 2 4 8 16 + * 0x8 35 70 140 rsv rsv rsv rsv rsv + */ +u16 e1000_rxpbs_adjust_82580(u32 data) +{ + u16 ret_val = 0; + + if (data < E1000_82580_RXPBS_TABLE_SIZE) + ret_val = e1000_82580_rxpbs_table[data]; + + return ret_val; +} + +/** + * e1000_validate_nvm_checksum_with_offset - Validate EEPROM + * checksum + * @hw: pointer to the HW structure + * @offset: offset in words of the checksum protected region + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + **/ +s32 e1000_validate_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) +{ + s32 ret_val = E1000_SUCCESS; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_validate_nvm_checksum_with_offset"); + + for (i = offset; i < ((NVM_CHECKSUM_REG + offset) + 1); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + checksum += nvm_data; + } + + if (checksum != (u16) NVM_SUM) { + DEBUGOUT("NVM Checksum Invalid\n"); + ret_val = -E1000_ERR_NVM; + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_update_nvm_checksum_with_offset - Update EEPROM + * checksum + * @hw: pointer to the HW structure + * @offset: offset in words of the checksum protected region + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. + **/ +s32 e1000_update_nvm_checksum_with_offset(struct e1000_hw *hw, u16 offset) +{ + s32 ret_val; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_update_nvm_checksum_with_offset"); + + for (i = offset; i < (NVM_CHECKSUM_REG + offset); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error while updating checksum.\n"); + goto out; + } + checksum += nvm_data; + } + checksum = (u16) NVM_SUM - checksum; + ret_val = hw->nvm.ops.write(hw, (NVM_CHECKSUM_REG + offset), 1, + &checksum); + if (ret_val) + DEBUGOUT("NVM Write Error while updating checksum.\n"); + +out: + return ret_val; +} + +/** + * e1000_validate_nvm_checksum_82580 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM section checksum by reading/adding each word of + * the EEPROM and then verifies that the sum of the EEPROM is + * equal to 0xBABA. + **/ +static s32 e1000_validate_nvm_checksum_82580(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 eeprom_regions_count = 1; + u16 j, nvm_data; + u16 nvm_offset; + + DEBUGFUNC("e1000_validate_nvm_checksum_82580"); + + ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + if (nvm_data & NVM_COMPATIBILITY_BIT_MASK) { + /* if chekcsums compatibility bit is set validate checksums + * for all 4 ports. */ + eeprom_regions_count = 4; + } + + for (j = 0; j < eeprom_regions_count; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_validate_nvm_checksum_with_offset(hw, + nvm_offset); + if (ret_val != E1000_SUCCESS) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_update_nvm_checksum_82580 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM section checksums for all 4 ports by reading/adding + * each word of the EEPROM up to the checksum. Then calculates the EEPROM + * checksum and writes the value to the EEPROM. + **/ +static s32 e1000_update_nvm_checksum_82580(struct e1000_hw *hw) +{ + s32 ret_val; + u16 j, nvm_data; + u16 nvm_offset; + + DEBUGFUNC("e1000_update_nvm_checksum_82580"); + + ret_val = hw->nvm.ops.read(hw, NVM_COMPATIBILITY_REG_3, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error while updating checksum compatibility bit.\n"); + goto out; + } + + if (!(nvm_data & NVM_COMPATIBILITY_BIT_MASK)) { + /* set compatibility bit to validate checksums appropriately */ + nvm_data = nvm_data | NVM_COMPATIBILITY_BIT_MASK; + ret_val = hw->nvm.ops.write(hw, NVM_COMPATIBILITY_REG_3, 1, + &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Write Error while updating checksum compatibility bit.\n"); + goto out; + } + } + + for (j = 0; j < 4; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); + if (ret_val) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_validate_nvm_checksum_i350 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM section checksum by reading/adding each word of + * the EEPROM and then verifies that the sum of the EEPROM is + * equal to 0xBABA. + **/ +static s32 e1000_validate_nvm_checksum_i350(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 j; + u16 nvm_offset; + + DEBUGFUNC("e1000_validate_nvm_checksum_i350"); + + for (j = 0; j < 4; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_validate_nvm_checksum_with_offset(hw, + nvm_offset); + if (ret_val != E1000_SUCCESS) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_update_nvm_checksum_i350 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM section checksums for all 4 ports by reading/adding + * each word of the EEPROM up to the checksum. Then calculates the EEPROM + * checksum and writes the value to the EEPROM. + **/ +static s32 e1000_update_nvm_checksum_i350(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 j; + u16 nvm_offset; + + DEBUGFUNC("e1000_update_nvm_checksum_i350"); + + for (j = 0; j < 4; j++) { + nvm_offset = NVM_82580_LAN_FUNC_OFFSET(j); + ret_val = e1000_update_nvm_checksum_with_offset(hw, nvm_offset); + if (ret_val != E1000_SUCCESS) + goto out; + } + +out: + return ret_val; +} + +/** + * __e1000_access_emi_reg - Read/write EMI register + * @hw: pointer to the HW structure + * @addr: EMI address to program + * @data: pointer to value to read/write from/to the EMI address + * @read: boolean flag to indicate read or write + **/ +static s32 __e1000_access_emi_reg(struct e1000_hw *hw, u16 address, + u16 *data, bool read) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("__e1000_access_emi_reg"); + + ret_val = hw->phy.ops.write_reg(hw, E1000_EMIADD, address); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, E1000_EMIDATA, data); + else + ret_val = hw->phy.ops.write_reg(hw, E1000_EMIDATA, *data); + + return ret_val; +} + +/** + * e1000_read_emi_reg - Read Extended Management Interface register + * @hw: pointer to the HW structure + * @addr: EMI address to program + * @data: value to be read from the EMI address + **/ +s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data) +{ + DEBUGFUNC("e1000_read_emi_reg"); + + return __e1000_access_emi_reg(hw, addr, data, true); +} + +/** + * e1000_set_eee_i350 - Enable/disable EEE support + * @hw: pointer to the HW structure + * + * Enable/disable EEE based on setting in dev_spec structure. + * + **/ +s32 e1000_set_eee_i350(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 ipcnfg, eeer; + + DEBUGFUNC("e1000_set_eee_i350"); + + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) + goto out; + ipcnfg = E1000_READ_REG(hw, E1000_IPCNFG); + eeer = E1000_READ_REG(hw, E1000_EEER); + + /* enable or disable per user setting */ + if (!(hw->dev_spec._82575.eee_disable)) { + u32 eee_su = E1000_READ_REG(hw, E1000_EEE_SU); + + ipcnfg |= (E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); + eeer |= (E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | + E1000_EEER_LPI_FC); + + /* This bit should not be set in normal operation. */ + if (eee_su & E1000_EEE_SU_LPI_CLK_STP) + DEBUGOUT("LPI Clock Stop Bit should not be set!\n"); + } else { + ipcnfg &= ~(E1000_IPCNFG_EEE_1G_AN | E1000_IPCNFG_EEE_100M_AN); + eeer &= ~(E1000_EEER_TX_LPI_EN | E1000_EEER_RX_LPI_EN | + E1000_EEER_LPI_FC); + } + E1000_WRITE_REG(hw, E1000_IPCNFG, ipcnfg); + E1000_WRITE_REG(hw, E1000_EEER, eeer); + E1000_READ_REG(hw, E1000_IPCNFG); + E1000_READ_REG(hw, E1000_EEER); +out: + + return ret_val; +} + +/** + * e1000_set_eee_i354 - Enable/disable EEE support + * @hw: pointer to the HW structure + * + * Enable/disable EEE legacy mode based on setting in dev_spec structure. + * + **/ +s32 e1000_set_eee_i354(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_data; + + DEBUGFUNC("e1000_set_eee_i354"); + + if ((hw->phy.media_type != e1000_media_type_copper) || + ((phy->id != M88E1543_E_PHY_ID))) + goto out; + + if (!hw->dev_spec._82575.eee_disable) { + /* Switch to PHY page 18. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 18); + if (ret_val) + goto out; + + ret_val = phy->ops.read_reg(hw, E1000_M88E1543_EEE_CTRL_1, + &phy_data); + if (ret_val) + goto out; + + phy_data |= E1000_M88E1543_EEE_CTRL_1_MS; + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_EEE_CTRL_1, + phy_data); + if (ret_val) + goto out; + + /* Return the PHY to page 0. */ + ret_val = phy->ops.write_reg(hw, E1000_M88E1543_PAGE_ADDR, 0); + if (ret_val) + goto out; + + /* Turn on EEE advertisement. */ + ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + phy_data |= E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED; + ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + phy_data); + } else { + /* Turn off EEE advertisement. */ + ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + phy_data &= ~(E1000_EEE_ADV_100_SUPPORTED | + E1000_EEE_ADV_1000_SUPPORTED); + ret_val = e1000_write_xmdio_reg(hw, E1000_EEE_ADV_ADDR_I354, + E1000_EEE_ADV_DEV_I354, + phy_data); + } + +out: + return ret_val; +} + +/** + * e1000_get_eee_status_i354 - Get EEE status + * @hw: pointer to the HW structure + * @status: EEE status + * + * Get EEE status by guessing based on whether Tx or Rx LPI indications have + * been received. + **/ +s32 e1000_get_eee_status_i354(struct e1000_hw *hw, bool *status) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_data; + + DEBUGFUNC("e1000_get_eee_status_i354"); + + /* Check if EEE is supported on this device. */ + if ((hw->phy.media_type != e1000_media_type_copper) || + ((phy->id != M88E1543_E_PHY_ID))) + goto out; + + ret_val = e1000_read_xmdio_reg(hw, E1000_PCS_STATUS_ADDR_I354, + E1000_PCS_STATUS_DEV_I354, + &phy_data); + if (ret_val) + goto out; + + *status = phy_data & (E1000_PCS_STATUS_TX_LPI_RCVD | + E1000_PCS_STATUS_RX_LPI_RCVD) ? true : false; + +out: + return ret_val; +} + +/* Due to a hw errata, if the host tries to configure the VFTA register + * while performing queries from the BMC or DMA, then the VFTA in some + * cases won't be written. + */ + +/** + * e1000_clear_vfta_i350 - Clear VLAN filter table + * @hw: pointer to the HW structure + * + * Clears the register array which contains the VLAN filter table by + * setting all the values to 0. + **/ +void e1000_clear_vfta_i350(struct e1000_hw *hw) +{ + u32 offset; + int i; + + DEBUGFUNC("e1000_clear_vfta_350"); + + for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); + + E1000_WRITE_FLUSH(hw); + } +} + +/** + * e1000_write_vfta_i350 - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +void e1000_write_vfta_i350(struct e1000_hw *hw, u32 offset, u32 value) +{ + int i; + + DEBUGFUNC("e1000_write_vfta_350"); + + for (i = 0; i < 10; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); + + E1000_WRITE_FLUSH(hw); +} + + +/** + * e1000_set_i2c_bb - Enable I2C bit-bang + * @hw: pointer to the HW structure + * + * Enable I2C bit-bang interface + * + **/ +s32 e1000_set_i2c_bb(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 ctrl_ext, i2cparams; + + DEBUGFUNC("e1000_set_i2c_bb"); + + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_I2C_ENA; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); + + i2cparams = E1000_READ_REG(hw, E1000_I2CPARAMS); + i2cparams |= E1000_I2CBB_EN; + i2cparams |= E1000_I2C_DATA_OE_N; + i2cparams |= E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cparams); + E1000_WRITE_FLUSH(hw); + + return ret_val; +} + +/** + * e1000_read_i2c_byte_generic - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @dev_addr: device address + * @data: value read + * + * Performs byte read operation over I2C interface at + * a specified device address. + **/ +s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + s32 status = E1000_SUCCESS; + u32 max_retry = 10; + u32 retry = 1; + u16 swfw_mask = 0; + + bool nack = true; + + DEBUGFUNC("e1000_read_i2c_byte_generic"); + + swfw_mask = E1000_SWFW_PHY0_SM; + + do { + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) + != E1000_SUCCESS) { + status = E1000_ERR_SWFW_SYNC; + goto read_byte_out; + } + + e1000_i2c_start(hw); + + /* Device Address and write indication */ + status = e1000_clock_out_i2c_byte(hw, dev_addr); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_byte(hw, byte_offset); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + e1000_i2c_start(hw); + + /* Device Address and read indication */ + status = e1000_clock_out_i2c_byte(hw, (dev_addr | 0x1)); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_in_i2c_byte(hw, data); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_bit(hw, nack); + if (status != E1000_SUCCESS) + goto fail; + + e1000_i2c_stop(hw); + break; + +fail: + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + msec_delay(100); + e1000_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + DEBUGOUT("I2C byte read error - Retrying.\n"); + else + DEBUGOUT("I2C byte read error.\n"); + + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +read_byte_out: + + return status; +} + +/** + * e1000_write_i2c_byte_generic - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: value to write + * + * Performs byte write operation over I2C interface at + * a specified device address. + **/ +s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + s32 status = E1000_SUCCESS; + u32 max_retry = 1; + u32 retry = 0; + u16 swfw_mask = 0; + + DEBUGFUNC("e1000_write_i2c_byte_generic"); + + swfw_mask = E1000_SWFW_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) { + status = E1000_ERR_SWFW_SYNC; + goto write_byte_out; + } + + do { + e1000_i2c_start(hw); + + status = e1000_clock_out_i2c_byte(hw, dev_addr); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_byte(hw, byte_offset); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_clock_out_i2c_byte(hw, data); + if (status != E1000_SUCCESS) + goto fail; + + status = e1000_get_i2c_ack(hw); + if (status != E1000_SUCCESS) + goto fail; + + e1000_i2c_stop(hw); + break; + +fail: + e1000_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + DEBUGOUT("I2C byte write error - Retrying.\n"); + else + DEBUGOUT("I2C byte write error.\n"); + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +write_byte_out: + + return status; +} + +/** + * e1000_i2c_start - Sets I2C start condition + * @hw: pointer to hardware structure + * + * Sets I2C start condition (High -> Low on SDA while SCL is High) + **/ +static void e1000_i2c_start(struct e1000_hw *hw) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_i2c_start"); + + /* Start condition must begin with data and clock high */ + e1000_set_i2c_data(hw, &i2cctl, 1); + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for start condition (4.7us) */ + usec_delay(E1000_I2C_T_SU_STA); + + e1000_set_i2c_data(hw, &i2cctl, 0); + + /* Hold time for start condition (4us) */ + usec_delay(E1000_I2C_T_HD_STA); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + usec_delay(E1000_I2C_T_LOW); + +} + +/** + * e1000_i2c_stop - Sets I2C stop condition + * @hw: pointer to hardware structure + * + * Sets I2C stop condition (Low -> High on SDA while SCL is High) + **/ +static void e1000_i2c_stop(struct e1000_hw *hw) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_i2c_stop"); + + /* Stop condition must begin with data low and clock high */ + e1000_set_i2c_data(hw, &i2cctl, 0); + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for stop condition (4us) */ + usec_delay(E1000_I2C_T_SU_STO); + + e1000_set_i2c_data(hw, &i2cctl, 1); + + /* bus free time between stop and start (4.7us)*/ + usec_delay(E1000_I2C_T_BUF); +} + +/** + * e1000_clock_in_i2c_byte - Clocks in one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte to clock in + * + * Clocks in one byte data via I2C data/clock + **/ +static s32 e1000_clock_in_i2c_byte(struct e1000_hw *hw, u8 *data) +{ + s32 i; + bool bit = 0; + + DEBUGFUNC("e1000_clock_in_i2c_byte"); + + *data = 0; + for (i = 7; i >= 0; i--) { + e1000_clock_in_i2c_bit(hw, &bit); + *data |= bit << i; + } + + return E1000_SUCCESS; +} + +/** + * e1000_clock_out_i2c_byte - Clocks out one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte clocked out + * + * Clocks out one byte data via I2C data/clock + **/ +static s32 e1000_clock_out_i2c_byte(struct e1000_hw *hw, u8 data) +{ + s32 status = E1000_SUCCESS; + s32 i; + u32 i2cctl; + bool bit = 0; + + DEBUGFUNC("e1000_clock_out_i2c_byte"); + + for (i = 7; i >= 0; i--) { + bit = (data >> i) & 0x1; + status = e1000_clock_out_i2c_bit(hw, bit); + + if (status != E1000_SUCCESS) + break; + } + + /* Release SDA line (set high) */ + i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + i2cctl |= E1000_I2C_DATA_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); + E1000_WRITE_FLUSH(hw); + + return status; +} + +/** + * e1000_get_i2c_ack - Polls for I2C ACK + * @hw: pointer to hardware structure + * + * Clocks in/out one bit via I2C data/clock + **/ +static s32 e1000_get_i2c_ack(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + u32 i = 0; + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + u32 timeout = 10; + bool ack = true; + + DEBUGFUNC("e1000_get_i2c_ack"); + + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + /* Wait until SCL returns high */ + for (i = 0; i < timeout; i++) { + usec_delay(1); + i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + if (i2cctl & E1000_I2C_CLK_IN) + break; + } + if (!(i2cctl & E1000_I2C_CLK_IN)) + return E1000_ERR_I2C; + + ack = e1000_get_i2c_data(&i2cctl); + if (ack) { + DEBUGOUT("I2C ack was not received.\n"); + status = E1000_ERR_I2C; + } + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + usec_delay(E1000_I2C_T_LOW); + + return status; +} + +/** + * e1000_clock_in_i2c_bit - Clocks in one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: read data value + * + * Clocks in one bit via I2C data/clock + **/ +static s32 e1000_clock_in_i2c_bit(struct e1000_hw *hw, bool *data) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_clock_in_i2c_bit"); + + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + *data = e1000_get_i2c_data(&i2cctl); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + usec_delay(E1000_I2C_T_LOW); + + return E1000_SUCCESS; +} + +/** + * e1000_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: data value to write + * + * Clocks out one bit via I2C data/clock + **/ +static s32 e1000_clock_out_i2c_bit(struct e1000_hw *hw, bool data) +{ + s32 status; + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + DEBUGFUNC("e1000_clock_out_i2c_bit"); + + status = e1000_set_i2c_data(hw, &i2cctl, data); + if (status == E1000_SUCCESS) { + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us. + * This also takes care of the data hold time. + */ + usec_delay(E1000_I2C_T_LOW); + } else { + status = E1000_ERR_I2C; + DEBUGOUT1("I2C data was not set to %X\n", data); + } + + return status; +} +/** + * e1000_raise_i2c_clk - Raises the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Raises the I2C clock line '0'->'1' + **/ +static void e1000_raise_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) +{ + DEBUGFUNC("e1000_raise_i2c_clk"); + + *i2cctl |= E1000_I2C_CLK_OUT; + *i2cctl &= ~E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); + E1000_WRITE_FLUSH(hw); + + /* SCL rise time (1000ns) */ + usec_delay(E1000_I2C_T_RISE); +} + +/** + * e1000_lower_i2c_clk - Lowers the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Lowers the I2C clock line '1'->'0' + **/ +static void e1000_lower_i2c_clk(struct e1000_hw *hw, u32 *i2cctl) +{ + + DEBUGFUNC("e1000_lower_i2c_clk"); + + *i2cctl &= ~E1000_I2C_CLK_OUT; + *i2cctl &= ~E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); + E1000_WRITE_FLUSH(hw); + + /* SCL fall time (300ns) */ + usec_delay(E1000_I2C_T_FALL); +} + +/** + * e1000_set_i2c_data - Sets the I2C data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * @data: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + **/ +static s32 e1000_set_i2c_data(struct e1000_hw *hw, u32 *i2cctl, bool data) +{ + s32 status = E1000_SUCCESS; + + DEBUGFUNC("e1000_set_i2c_data"); + + if (data) + *i2cctl |= E1000_I2C_DATA_OUT; + else + *i2cctl &= ~E1000_I2C_DATA_OUT; + + *i2cctl &= ~E1000_I2C_DATA_OE_N; + *i2cctl |= E1000_I2C_CLK_OE_N; + E1000_WRITE_REG(hw, E1000_I2CPARAMS, *i2cctl); + E1000_WRITE_FLUSH(hw); + + /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ + usec_delay(E1000_I2C_T_RISE + E1000_I2C_T_FALL + E1000_I2C_T_SU_DATA); + + *i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + if (data != e1000_get_i2c_data(i2cctl)) { + status = E1000_ERR_I2C; + DEBUGOUT1("Error - I2C data was not set to %X.\n", data); + } + + return status; +} + +/** + * e1000_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + **/ +static bool e1000_get_i2c_data(u32 *i2cctl) +{ + bool data; + + DEBUGFUNC("e1000_get_i2c_data"); + + if (*i2cctl & E1000_I2C_DATA_IN) + data = 1; + else + data = 0; + + return data; +} + +/** + * e1000_i2c_bus_clear - Clears the I2C bus + * @hw: pointer to hardware structure + * + * Clears the I2C bus by sending nine clock pulses. + * Used when data line is stuck low. + **/ +void e1000_i2c_bus_clear(struct e1000_hw *hw) +{ + u32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + u32 i; + + DEBUGFUNC("e1000_i2c_bus_clear"); + + e1000_i2c_start(hw); + + e1000_set_i2c_data(hw, &i2cctl, 1); + + for (i = 0; i < 9; i++) { + e1000_raise_i2c_clk(hw, &i2cctl); + + /* Min high period of clock is 4us */ + usec_delay(E1000_I2C_T_HIGH); + + e1000_lower_i2c_clk(hw, &i2cctl); + + /* Min low period of clock is 4.7us*/ + usec_delay(E1000_I2C_T_LOW); + } + + e1000_i2c_start(hw); + + /* Put the i2c bus back to default state */ + e1000_i2c_stop(hw); +} + +static const u8 e1000_emc_temp_data[4] = { + E1000_EMC_INTERNAL_DATA, + E1000_EMC_DIODE1_DATA, + E1000_EMC_DIODE2_DATA, + E1000_EMC_DIODE3_DATA +}; +static const u8 e1000_emc_therm_limit[4] = { + E1000_EMC_INTERNAL_THERM_LIMIT, + E1000_EMC_DIODE1_THERM_LIMIT, + E1000_EMC_DIODE2_THERM_LIMIT, + E1000_EMC_DIODE3_THERM_LIMIT +}; + +/** + * e1000_get_thermal_sensor_data_generic - Gathers thermal sensor data + * @hw: pointer to hardware structure + * + * Updates the temperatures in mac.thermal_sensor_data + **/ +s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 i; + struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + DEBUGFUNC("e1000_get_thermal_sensor_data_generic"); + + if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) + return E1000_NOT_IMPLEMENTED; + + data->sensor[0].temp = (E1000_READ_REG(hw, E1000_THMJT) & 0xFF); + + /* Return the internal sensor only if ETS is unsupported */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset); + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) + return status; + + e1000_read_nvm(hw, ets_offset, 1, &ets_cfg); + if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) + != NVM_ETS_TYPE_EMC) + return E1000_NOT_IMPLEMENTED; + + num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); + if (num_sensors > E1000_MAX_SENSORS) + num_sensors = E1000_MAX_SENSORS; + + for (i = 1; i < num_sensors; i++) { + e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor); + sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> + NVM_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> + NVM_ETS_DATA_LOC_SHIFT); + + if (sensor_location != 0) + hw->phy.ops.read_i2c_byte(hw, + e1000_emc_temp_data[sensor_index], + E1000_I2C_THERMAL_SENSOR_ADDR, + &data->sensor[i].temp); + } + return status; +} + +/** + * e1000_init_thermal_sensor_thresh_generic - Sets thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Sets the thermal sensor thresholds according to the NVM map + * and save off the threshold and location values into mac.thermal_sensor_data + **/ +s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 low_thresh_delta; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 therm_limit; + u8 i; + struct e1000_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + DEBUGFUNC("e1000_init_thermal_sensor_thresh_generic"); + + if ((hw->mac.type != e1000_i350) || (hw->bus.func != 0)) + return E1000_NOT_IMPLEMENTED; + + memset(data, 0, sizeof(struct e1000_thermal_sensor_data)); + + data->sensor[0].location = 0x1; + data->sensor[0].caution_thresh = + (E1000_READ_REG(hw, E1000_THHIGHTC) & 0xFF); + data->sensor[0].max_op_thresh = + (E1000_READ_REG(hw, E1000_THLOWTC) & 0xFF); + + /* Return the internal sensor only if ETS is unsupported */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_offset); + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) + return status; + + e1000_read_nvm(hw, ets_offset, 1, &ets_cfg); + if (((ets_cfg & NVM_ETS_TYPE_MASK) >> NVM_ETS_TYPE_SHIFT) + != NVM_ETS_TYPE_EMC) + return E1000_NOT_IMPLEMENTED; + + low_thresh_delta = ((ets_cfg & NVM_ETS_LTHRES_DELTA_MASK) >> + NVM_ETS_LTHRES_DELTA_SHIFT); + num_sensors = (ets_cfg & NVM_ETS_NUM_SENSORS_MASK); + + for (i = 1; i <= num_sensors; i++) { + e1000_read_nvm(hw, (ets_offset + i), 1, &ets_sensor); + sensor_index = ((ets_sensor & NVM_ETS_DATA_INDEX_MASK) >> + NVM_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & NVM_ETS_DATA_LOC_MASK) >> + NVM_ETS_DATA_LOC_SHIFT); + therm_limit = ets_sensor & NVM_ETS_DATA_HTHRESH_MASK; + + hw->phy.ops.write_i2c_byte(hw, + e1000_emc_therm_limit[sensor_index], + E1000_I2C_THERMAL_SENSOR_ADDR, + therm_limit); + + if ((i < E1000_MAX_SENSORS) && (sensor_location != 0)) { + data->sensor[i].location = sensor_location; + data->sensor[i].caution_thresh = therm_limit; + data->sensor[i].max_op_thresh = therm_limit - + low_thresh_delta; + } + } + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h new file mode 100644 index 00000000..1aec75ab --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_82575.h @@ -0,0 +1,509 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_82575_H_ +#define _E1000_82575_H_ + +#define ID_LED_DEFAULT_82575_SERDES ((ID_LED_DEF1_DEF2 << 12) | \ + (ID_LED_DEF1_DEF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_ON2)) +/* + * Receive Address Register Count + * Number of high/low register pairs in the RAR. The RAR (Receive Address + * Registers) holds the directed and multicast addresses that we monitor. + * These entries are also used for MAC-based filtering. + */ +/* + * For 82576, there are an additional set of RARs that begin at an offset + * separate from the first set of RARs. + */ +#define E1000_RAR_ENTRIES_82575 16 +#define E1000_RAR_ENTRIES_82576 24 +#define E1000_RAR_ENTRIES_82580 24 +#define E1000_RAR_ENTRIES_I350 32 +#define E1000_SW_SYNCH_MB 0x00000100 +#define E1000_STAT_DEV_RST_SET 0x00100000 +#define E1000_CTRL_DEV_RST 0x20000000 + +struct e1000_adv_data_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + u32 data; + struct { + u32 datalen:16; /* Data buffer length */ + u32 rsvd:4; + u32 dtyp:4; /* Descriptor type */ + u32 dcmd:8; /* Descriptor command */ + } config; + } lower; + union { + u32 data; + struct { + u32 status:4; /* Descriptor status */ + u32 idx:4; + u32 popts:6; /* Packet Options */ + u32 paylen:18; /* Payload length */ + } options; + } upper; +}; + +#define E1000_TXD_DTYP_ADV_C 0x2 /* Advanced Context Descriptor */ +#define E1000_TXD_DTYP_ADV_D 0x3 /* Advanced Data Descriptor */ +#define E1000_ADV_TXD_CMD_DEXT 0x20 /* Descriptor extension (0 = legacy) */ +#define E1000_ADV_TUCMD_IPV4 0x2 /* IP Packet Type: 1=IPv4 */ +#define E1000_ADV_TUCMD_IPV6 0x0 /* IP Packet Type: 0=IPv6 */ +#define E1000_ADV_TUCMD_L4T_UDP 0x0 /* L4 Packet TYPE of UDP */ +#define E1000_ADV_TUCMD_L4T_TCP 0x4 /* L4 Packet TYPE of TCP */ +#define E1000_ADV_TUCMD_MKRREQ 0x10 /* Indicates markers are required */ +#define E1000_ADV_DCMD_EOP 0x1 /* End of Packet */ +#define E1000_ADV_DCMD_IFCS 0x2 /* Insert FCS (Ethernet CRC) */ +#define E1000_ADV_DCMD_RS 0x8 /* Report Status */ +#define E1000_ADV_DCMD_VLE 0x40 /* Add VLAN tag */ +#define E1000_ADV_DCMD_TSE 0x80 /* TCP Seg enable */ +/* Extended Device Control */ +#define E1000_CTRL_EXT_NSICR 0x00000001 /* Disable Intr Clear all on read */ + +struct e1000_adv_context_desc { + union { + u32 ip_config; + struct { + u32 iplen:9; + u32 maclen:7; + u32 vlan_tag:16; + } fields; + } ip_setup; + u32 seq_num; + union { + u64 l4_config; + struct { + u32 mkrloc:9; + u32 tucmd:11; + u32 dtyp:4; + u32 adv:8; + u32 rsvd:4; + u32 idx:4; + u32 l4len:8; + u32 mss:16; + } fields; + } l4_setup; +}; + +/* SRRCTL bit definitions */ +#define E1000_SRRCTL_BSIZEPKT_SHIFT 10 /* Shift _right_ */ +#define E1000_SRRCTL_BSIZEHDRSIZE_MASK 0x00000F00 +#define E1000_SRRCTL_BSIZEHDRSIZE_SHIFT 2 /* Shift _left_ */ +#define E1000_SRRCTL_DESCTYPE_LEGACY 0x00000000 +#define E1000_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 +#define E1000_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 +#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION 0x06000000 +#define E1000_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 +#define E1000_SRRCTL_DESCTYPE_MASK 0x0E000000 +#define E1000_SRRCTL_TIMESTAMP 0x40000000 +#define E1000_SRRCTL_DROP_EN 0x80000000 + +#define E1000_SRRCTL_BSIZEPKT_MASK 0x0000007F +#define E1000_SRRCTL_BSIZEHDR_MASK 0x00003F00 + +#define E1000_TX_HEAD_WB_ENABLE 0x1 +#define E1000_TX_SEQNUM_WB_ENABLE 0x2 + +#define E1000_MRQC_ENABLE_RSS_4Q 0x00000002 +#define E1000_MRQC_ENABLE_VMDQ 0x00000003 +#define E1000_MRQC_ENABLE_VMDQ_RSS_2Q 0x00000005 +#define E1000_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 +#define E1000_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +#define E1000_MRQC_RSS_FIELD_IPV6_UDP_EX 0x01000000 +#define E1000_MRQC_ENABLE_RSS_8Q 0x00000002 + +#define E1000_VMRCTL_MIRROR_PORT_SHIFT 8 +#define E1000_VMRCTL_MIRROR_DSTPORT_MASK (7 << \ + E1000_VMRCTL_MIRROR_PORT_SHIFT) +#define E1000_VMRCTL_POOL_MIRROR_ENABLE (1 << 0) +#define E1000_VMRCTL_UPLINK_MIRROR_ENABLE (1 << 1) +#define E1000_VMRCTL_DOWNLINK_MIRROR_ENABLE (1 << 2) + +#define E1000_EICR_TX_QUEUE ( \ + E1000_EICR_TX_QUEUE0 | \ + E1000_EICR_TX_QUEUE1 | \ + E1000_EICR_TX_QUEUE2 | \ + E1000_EICR_TX_QUEUE3) + +#define E1000_EICR_RX_QUEUE ( \ + E1000_EICR_RX_QUEUE0 | \ + E1000_EICR_RX_QUEUE1 | \ + E1000_EICR_RX_QUEUE2 | \ + E1000_EICR_RX_QUEUE3) + +#define E1000_EIMS_RX_QUEUE E1000_EICR_RX_QUEUE +#define E1000_EIMS_TX_QUEUE E1000_EICR_TX_QUEUE + +#define EIMS_ENABLE_MASK ( \ + E1000_EIMS_RX_QUEUE | \ + E1000_EIMS_TX_QUEUE | \ + E1000_EIMS_TCP_TIMER | \ + E1000_EIMS_OTHER) + +/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ +#define E1000_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ +#define E1000_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ +#define E1000_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ +#define E1000_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ +#define E1000_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ +#define E1000_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ +#define E1000_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ +#define E1000_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ +#define E1000_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ +#define E1000_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of ctrl bits */ + +/* Receive Descriptor - Advanced */ +union e1000_adv_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + } read; + struct { + struct { + union { + __le32 data; + struct { + __le16 pkt_info; /*RSS type, Pkt type*/ + /* Split Header, header buffer len */ + __le16 hdr_info; + } hs_rss; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; /* Packet length */ + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +#define E1000_RXDADV_RSSTYPE_MASK 0x0000000F +#define E1000_RXDADV_RSSTYPE_SHIFT 12 +#define E1000_RXDADV_HDRBUFLEN_MASK 0x7FE0 +#define E1000_RXDADV_HDRBUFLEN_SHIFT 5 +#define E1000_RXDADV_SPLITHEADER_EN 0x00001000 +#define E1000_RXDADV_SPH 0x8000 +#define E1000_RXDADV_STAT_TS 0x10000 /* Pkt was time stamped */ +#define E1000_RXDADV_STAT_TSIP 0x08000 /* timestamp in packet */ +#define E1000_RXDADV_ERR_HBO 0x00800000 + +/* RSS Hash results */ +#define E1000_RXDADV_RSSTYPE_NONE 0x00000000 +#define E1000_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 +#define E1000_RXDADV_RSSTYPE_IPV4 0x00000002 +#define E1000_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 +#define E1000_RXDADV_RSSTYPE_IPV6_EX 0x00000004 +#define E1000_RXDADV_RSSTYPE_IPV6 0x00000005 +#define E1000_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 +#define E1000_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 +#define E1000_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 +#define E1000_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 + +/* RSS Packet Types as indicated in the receive descriptor */ +#define E1000_RXDADV_PKTTYPE_NONE 0x00000000 +#define E1000_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPV4 hdr present */ +#define E1000_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPV4 hdr + extensions */ +#define E1000_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPV6 hdr present */ +#define E1000_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPV6 hdr + extensions */ +#define E1000_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ +#define E1000_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ +#define E1000_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ +#define E1000_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ + +#define E1000_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ +#define E1000_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ +#define E1000_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ +#define E1000_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ +#define E1000_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ +#define E1000_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ + +/* LinkSec results */ +/* Security Processing bit Indication */ +#define E1000_RXDADV_LNKSEC_STATUS_SECP 0x00020000 +#define E1000_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 +#define E1000_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 +#define E1000_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 +#define E1000_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 + +#define E1000_RXDADV_IPSEC_STATUS_SECP 0x00020000 +#define E1000_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 +#define E1000_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 +#define E1000_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 +#define E1000_RXDADV_IPSEC_ERROR_AUTHENTICATION_FAILED 0x18000000 + +/* Transmit Descriptor - Advanced */ +union e1000_adv_tx_desc { + struct { + __le64 buffer_addr; /* Address of descriptor's data buf */ + __le32 cmd_type_len; + __le32 olinfo_status; + } read; + struct { + __le64 rsvd; /* Reserved */ + __le32 nxtseq_seed; + __le32 status; + } wb; +}; + +/* Adv Transmit Descriptor Config Masks */ +#define E1000_ADVTXD_DTYP_CTXT 0x00200000 /* Advanced Context Descriptor */ +#define E1000_ADVTXD_DTYP_DATA 0x00300000 /* Advanced Data Descriptor */ +#define E1000_ADVTXD_DCMD_EOP 0x01000000 /* End of Packet */ +#define E1000_ADVTXD_DCMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_ADVTXD_DCMD_RS 0x08000000 /* Report Status */ +#define E1000_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ +#define E1000_ADVTXD_DCMD_DEXT 0x20000000 /* Descriptor extension (1=Adv) */ +#define E1000_ADVTXD_DCMD_VLE 0x40000000 /* VLAN pkt enable */ +#define E1000_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define E1000_ADVTXD_MAC_LINKSEC 0x00040000 /* Apply LinkSec on pkt */ +#define E1000_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 Timestamp pkt */ +#define E1000_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED prsnt in WB */ +#define E1000_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ +#define E1000_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ +#define E1000_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ +#define E1000_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ +/* 1st & Last TSO-full iSCSI PDU*/ +#define E1000_ADVTXD_POPTS_ISCO_FULL 0x00001800 +#define E1000_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ +#define E1000_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ + +/* Context descriptors */ +struct e1000_adv_tx_context_desc { + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; +}; + +#define E1000_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define E1000_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ +#define E1000_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ +#define E1000_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ +#define E1000_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ +#define E1000_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define E1000_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ +#define E1000_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ +/* IPSec Encrypt Enable for ESP */ +#define E1000_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000 +/* Req requires Markers and CRC */ +#define E1000_ADVTXD_TUCMD_MKRREQ 0x00002000 +#define E1000_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define E1000_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ +/* Adv ctxt IPSec SA IDX mask */ +#define E1000_ADVTXD_IPSEC_SA_INDEX_MASK 0x000000FF +/* Adv ctxt IPSec ESP len mask */ +#define E1000_ADVTXD_IPSEC_ESP_LEN_MASK 0x000000FF + +/* Additional Transmit Descriptor Control definitions */ +#define E1000_TXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Tx Queue */ +#define E1000_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wbk flushing */ +/* Tx Queue Arbitration Priority 0=low, 1=high */ +#define E1000_TXDCTL_PRIORITY 0x08000000 + +/* Additional Receive Descriptor Control definitions */ +#define E1000_RXDCTL_QUEUE_ENABLE 0x02000000 /* Ena specific Rx Queue */ +#define E1000_RXDCTL_SWFLSH 0x04000000 /* Rx Desc. wbk flushing */ + +/* Direct Cache Access (DCA) definitions */ +#define E1000_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ +#define E1000_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ + +#define E1000_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ +#define E1000_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ + +#define E1000_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ +#define E1000_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* DCA Rx Desc enable */ +#define E1000_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* DCA Rx Desc header ena */ +#define E1000_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* DCA Rx Desc payload ena */ +#define E1000_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* DCA Rx Desc Relax Order */ + +#define E1000_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ +#define E1000_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ +#define E1000_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ +#define E1000_DCA_TXCTRL_TX_WB_RO_EN (1 << 11) /* Tx Desc writeback RO bit */ +#define E1000_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ + +#define E1000_DCA_TXCTRL_CPUID_MASK_82576 0xFF000000 /* Tx CPUID Mask */ +#define E1000_DCA_RXCTRL_CPUID_MASK_82576 0xFF000000 /* Rx CPUID Mask */ +#define E1000_DCA_TXCTRL_CPUID_SHIFT_82576 24 /* Tx CPUID */ +#define E1000_DCA_RXCTRL_CPUID_SHIFT_82576 24 /* Rx CPUID */ + +/* Additional interrupt register bit definitions */ +#define E1000_ICR_LSECPNS 0x00000020 /* PN threshold - server */ +#define E1000_IMS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ +#define E1000_ICS_LSECPNS E1000_ICR_LSECPNS /* PN threshold - server */ + +/* ETQF register bit definitions */ +#define E1000_ETQF_FILTER_ENABLE (1 << 26) +#define E1000_ETQF_IMM_INT (1 << 29) +#define E1000_ETQF_1588 (1 << 30) +#define E1000_ETQF_QUEUE_ENABLE (1 << 31) +/* + * ETQF filter list: one static filter per filter consumer. This is + * to avoid filter collisions later. Add new filters + * here!! + * + * Current filters: + * EAPOL 802.1x (0x888e): Filter 0 + */ +#define E1000_ETQF_FILTER_EAPOL 0 + +#define E1000_FTQF_VF_BP 0x00008000 +#define E1000_FTQF_1588_TIME_STAMP 0x08000000 +#define E1000_FTQF_MASK 0xF0000000 +#define E1000_FTQF_MASK_PROTO_BP 0x10000000 +#define E1000_FTQF_MASK_SOURCE_ADDR_BP 0x20000000 +#define E1000_FTQF_MASK_DEST_ADDR_BP 0x40000000 +#define E1000_FTQF_MASK_SOURCE_PORT_BP 0x80000000 + +#define E1000_NVM_APME_82575 0x0400 +#define MAX_NUM_VFS 7 + +#define E1000_DTXSWC_MAC_SPOOF_MASK 0x000000FF /* Per VF MAC spoof cntrl */ +#define E1000_DTXSWC_VLAN_SPOOF_MASK 0x0000FF00 /* Per VF VLAN spoof cntrl */ +#define E1000_DTXSWC_LLE_MASK 0x00FF0000 /* Per VF Local LB enables */ +#define E1000_DTXSWC_VLAN_SPOOF_SHIFT 8 +#define E1000_DTXSWC_LLE_SHIFT 16 +#define E1000_DTXSWC_VMDQ_LOOPBACK_EN (1 << 31) /* global VF LB enable */ + +/* Easy defines for setting default pool, would normally be left a zero */ +#define E1000_VT_CTL_DEFAULT_POOL_SHIFT 7 +#define E1000_VT_CTL_DEFAULT_POOL_MASK (0x7 << E1000_VT_CTL_DEFAULT_POOL_SHIFT) + +/* Other useful VMD_CTL register defines */ +#define E1000_VT_CTL_IGNORE_MAC (1 << 28) +#define E1000_VT_CTL_DISABLE_DEF_POOL (1 << 29) +#define E1000_VT_CTL_VM_REPL_EN (1 << 30) + +/* Per VM Offload register setup */ +#define E1000_VMOLR_RLPML_MASK 0x00003FFF /* Long Packet Maximum Length mask */ +#define E1000_VMOLR_LPE 0x00010000 /* Accept Long packet */ +#define E1000_VMOLR_RSSE 0x00020000 /* Enable RSS */ +#define E1000_VMOLR_AUPE 0x01000000 /* Accept untagged packets */ +#define E1000_VMOLR_ROMPE 0x02000000 /* Accept overflow multicast */ +#define E1000_VMOLR_ROPE 0x04000000 /* Accept overflow unicast */ +#define E1000_VMOLR_BAM 0x08000000 /* Accept Broadcast packets */ +#define E1000_VMOLR_MPME 0x10000000 /* Multicast promiscuous mode */ +#define E1000_VMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ +#define E1000_VMOLR_STRCRC 0x80000000 /* CRC stripping enable */ + +#define E1000_VMOLR_VPE 0x00800000 /* VLAN promiscuous enable */ +#define E1000_VMOLR_UPE 0x20000000 /* Unicast promisuous enable */ +#define E1000_DVMOLR_HIDVLAN 0x20000000 /* Vlan hiding enable */ +#define E1000_DVMOLR_STRVLAN 0x40000000 /* Vlan stripping enable */ +#define E1000_DVMOLR_STRCRC 0x80000000 /* CRC stripping enable */ + +#define E1000_PBRWAC_WALPB 0x00000007 /* Wrap around event on LAN Rx PB */ +#define E1000_PBRWAC_PBE 0x00000008 /* Rx packet buffer empty */ + +#define E1000_VLVF_ARRAY_SIZE 32 +#define E1000_VLVF_VLANID_MASK 0x00000FFF +#define E1000_VLVF_POOLSEL_SHIFT 12 +#define E1000_VLVF_POOLSEL_MASK (0xFF << E1000_VLVF_POOLSEL_SHIFT) +#define E1000_VLVF_LVLAN 0x00100000 +#define E1000_VLVF_VLANID_ENABLE 0x80000000 + +#define E1000_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ +#define E1000_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ + +#define E1000_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ + +#define E1000_IOVCTL 0x05BBC +#define E1000_IOVCTL_REUSE_VFQ 0x00000001 + +#define E1000_RPLOLR_STRVLAN 0x40000000 +#define E1000_RPLOLR_STRCRC 0x80000000 + +#define E1000_TCTL_EXT_COLD 0x000FFC00 +#define E1000_TCTL_EXT_COLD_SHIFT 10 + +#define E1000_DTXCTL_8023LL 0x0004 +#define E1000_DTXCTL_VLAN_ADDED 0x0008 +#define E1000_DTXCTL_OOS_ENABLE 0x0010 +#define E1000_DTXCTL_MDP_EN 0x0020 +#define E1000_DTXCTL_SPOOF_INT 0x0040 + +#define E1000_EEPROM_PCS_AUTONEG_DISABLE_BIT (1 << 14) + +#define ALL_QUEUES 0xFFFF + +/* Rx packet buffer size defines */ +#define E1000_RXPBS_SIZE_MASK_82576 0x0000007F +void e1000_vmdq_set_loopback_pf(struct e1000_hw *hw, bool enable); +void e1000_vmdq_set_anti_spoofing_pf(struct e1000_hw *hw, bool enable, int pf); +void e1000_vmdq_set_replication_pf(struct e1000_hw *hw, bool enable); +s32 e1000_init_nvm_params_82575(struct e1000_hw *hw); + +u16 e1000_rxpbs_adjust_82580(u32 data); +s32 e1000_read_emi_reg(struct e1000_hw *hw, u16 addr, u16 *data); +s32 e1000_set_eee_i350(struct e1000_hw *); +s32 e1000_set_eee_i354(struct e1000_hw *); +s32 e1000_get_eee_status_i354(struct e1000_hw *, bool *); +#define E1000_I2C_THERMAL_SENSOR_ADDR 0xF8 +#define E1000_EMC_INTERNAL_DATA 0x00 +#define E1000_EMC_INTERNAL_THERM_LIMIT 0x20 +#define E1000_EMC_DIODE1_DATA 0x01 +#define E1000_EMC_DIODE1_THERM_LIMIT 0x19 +#define E1000_EMC_DIODE2_DATA 0x23 +#define E1000_EMC_DIODE2_THERM_LIMIT 0x1A +#define E1000_EMC_DIODE3_DATA 0x2A +#define E1000_EMC_DIODE3_THERM_LIMIT 0x30 + +s32 e1000_get_thermal_sensor_data_generic(struct e1000_hw *hw); +s32 e1000_init_thermal_sensor_thresh_generic(struct e1000_hw *hw); + +/* I2C SDA and SCL timing parameters for standard mode */ +#define E1000_I2C_T_HD_STA 4 +#define E1000_I2C_T_LOW 5 +#define E1000_I2C_T_HIGH 4 +#define E1000_I2C_T_SU_STA 5 +#define E1000_I2C_T_HD_DATA 5 +#define E1000_I2C_T_SU_DATA 1 +#define E1000_I2C_T_RISE 1 +#define E1000_I2C_T_FALL 1 +#define E1000_I2C_T_SU_STO 4 +#define E1000_I2C_T_BUF 5 + +s32 e1000_set_i2c_bb(struct e1000_hw *hw); +s32 e1000_read_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +s32 e1000_write_i2c_byte_generic(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); +void e1000_i2c_bus_clear(struct e1000_hw *hw); +#endif /* _E1000_82575_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c new file mode 100644 index 00000000..6095d3b4 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.c @@ -0,0 +1,1159 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +/** + * e1000_init_mac_params - Initialize MAC function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the MAC + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_mac_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->mac.ops.init_params) { + ret_val = hw->mac.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("MAC Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("mac.init_mac_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_init_nvm_params - Initialize NVM function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the NVM + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_nvm_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->nvm.ops.init_params) { + ret_val = hw->nvm.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("NVM Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("nvm.init_nvm_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_init_phy_params - Initialize PHY function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the PHY + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_phy_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->phy.ops.init_params) { + ret_val = hw->phy.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("PHY Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("phy.init_phy_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_init_mbx_params - Initialize mailbox function pointers + * @hw: pointer to the HW structure + * + * This function initializes the function pointers for the PHY + * set of functions. Called by drivers or by e1000_setup_init_funcs. + **/ +s32 e1000_init_mbx_params(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + + if (hw->mbx.ops.init_params) { + ret_val = hw->mbx.ops.init_params(hw); + if (ret_val) { + DEBUGOUT("Mailbox Initialization Error\n"); + goto out; + } + } else { + DEBUGOUT("mbx.init_mbx_params was NULL\n"); + ret_val = -E1000_ERR_CONFIG; + } + +out: + return ret_val; +} + +/** + * e1000_set_mac_type - Sets MAC type + * @hw: pointer to the HW structure + * + * This function sets the mac type of the adapter based on the + * device ID stored in the hw structure. + * MUST BE FIRST FUNCTION CALLED (explicitly or through + * e1000_setup_init_funcs()). + **/ +s32 e1000_set_mac_type(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_set_mac_type"); + + switch (hw->device_id) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + mac->type = e1000_82575; + break; + case E1000_DEV_ID_82576: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + case E1000_DEV_ID_82576_NS: + case E1000_DEV_ID_82576_NS_SERDES: + case E1000_DEV_ID_82576_SERDES_QUAD: + mac->type = e1000_82576; + break; + case E1000_DEV_ID_82580_COPPER: + case E1000_DEV_ID_82580_FIBER: + case E1000_DEV_ID_82580_SERDES: + case E1000_DEV_ID_82580_SGMII: + case E1000_DEV_ID_82580_COPPER_DUAL: + case E1000_DEV_ID_82580_QUAD_FIBER: + case E1000_DEV_ID_DH89XXCC_SGMII: + case E1000_DEV_ID_DH89XXCC_SERDES: + case E1000_DEV_ID_DH89XXCC_BACKPLANE: + case E1000_DEV_ID_DH89XXCC_SFP: + mac->type = e1000_82580; + break; + case E1000_DEV_ID_I350_COPPER: + case E1000_DEV_ID_I350_FIBER: + case E1000_DEV_ID_I350_SERDES: + case E1000_DEV_ID_I350_SGMII: + case E1000_DEV_ID_I350_DA4: + mac->type = e1000_i350; + break; + case E1000_DEV_ID_I210_COPPER_FLASHLESS: + case E1000_DEV_ID_I210_SERDES_FLASHLESS: + case E1000_DEV_ID_I210_COPPER: + case E1000_DEV_ID_I210_COPPER_OEM1: + case E1000_DEV_ID_I210_COPPER_IT: + case E1000_DEV_ID_I210_FIBER: + case E1000_DEV_ID_I210_SERDES: + case E1000_DEV_ID_I210_SGMII: + mac->type = e1000_i210; + break; + case E1000_DEV_ID_I211_COPPER: + mac->type = e1000_i211; + break; + + case E1000_DEV_ID_I354_BACKPLANE_1GBPS: + case E1000_DEV_ID_I354_SGMII: + case E1000_DEV_ID_I354_BACKPLANE_2_5GBPS: + mac->type = e1000_i354; + break; + default: + /* Should never have loaded on this device */ + ret_val = -E1000_ERR_MAC_INIT; + break; + } + + return ret_val; +} + +/** + * e1000_setup_init_funcs - Initializes function pointers + * @hw: pointer to the HW structure + * @init_device: true will initialize the rest of the function pointers + * getting the device ready for use. false will only set + * MAC type and the function pointers for the other init + * functions. Passing false will not generate any hardware + * reads or writes. + * + * This function must be called by a driver in order to use the rest + * of the 'shared' code files. Called by drivers only. + **/ +s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device) +{ + s32 ret_val; + + /* Can't do much good without knowing the MAC type. */ + ret_val = e1000_set_mac_type(hw); + if (ret_val) { + DEBUGOUT("ERROR: MAC type could not be set properly.\n"); + goto out; + } + + if (!hw->hw_addr) { + DEBUGOUT("ERROR: Registers not mapped\n"); + ret_val = -E1000_ERR_CONFIG; + goto out; + } + + /* + * Init function pointers to generic implementations. We do this first + * allowing a driver module to override it afterward. + */ + e1000_init_mac_ops_generic(hw); + e1000_init_phy_ops_generic(hw); + e1000_init_nvm_ops_generic(hw); + e1000_init_mbx_ops_generic(hw); + + /* + * Set up the init function pointers. These are functions within the + * adapter family file that sets up function pointers for the rest of + * the functions in that family. + */ + switch (hw->mac.type) { + case e1000_82575: + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + e1000_init_function_pointers_82575(hw); + break; + case e1000_i210: + case e1000_i211: + e1000_init_function_pointers_i210(hw); + break; + default: + DEBUGOUT("Hardware not supported\n"); + ret_val = -E1000_ERR_CONFIG; + break; + } + + /* + * Initialize the rest of the function pointers. These require some + * register reads/writes in some cases. + */ + if (!(ret_val) && init_device) { + ret_val = e1000_init_mac_params(hw); + if (ret_val) + goto out; + + ret_val = e1000_init_nvm_params(hw); + if (ret_val) + goto out; + + ret_val = e1000_init_phy_params(hw); + if (ret_val) + goto out; + + ret_val = e1000_init_mbx_params(hw); + if (ret_val) + goto out; + } + +out: + return ret_val; +} + +/** + * e1000_get_bus_info - Obtain bus information for adapter + * @hw: pointer to the HW structure + * + * This will obtain information about the HW bus for which the + * adapter is attached and stores it in the hw structure. This is a + * function pointer entry point called by drivers. + **/ +s32 e1000_get_bus_info(struct e1000_hw *hw) +{ + if (hw->mac.ops.get_bus_info) + return hw->mac.ops.get_bus_info(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_clear_vfta - Clear VLAN filter table + * @hw: pointer to the HW structure + * + * This clears the VLAN filter table on the adapter. This is a function + * pointer entry point called by drivers. + **/ +void e1000_clear_vfta(struct e1000_hw *hw) +{ + if (hw->mac.ops.clear_vfta) + hw->mac.ops.clear_vfta(hw); +} + +/** + * e1000_write_vfta - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: the 32-bit offset in which to write the value to. + * @value: the 32-bit value to write at location offset. + * + * This writes a 32-bit value to a 32-bit offset in the VLAN filter + * table. This is a function pointer entry point called by drivers. + **/ +void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value) +{ + if (hw->mac.ops.write_vfta) + hw->mac.ops.write_vfta(hw, offset, value); +} + +/** + * e1000_update_mc_addr_list - Update Multicast addresses + * @hw: pointer to the HW structure + * @mc_addr_list: array of multicast addresses to program + * @mc_addr_count: number of multicast addresses to program + * + * Updates the Multicast Table Array. + * The caller must have a packed mc_addr_list of multicast addresses. + **/ +void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count) +{ + if (hw->mac.ops.update_mc_addr_list) + hw->mac.ops.update_mc_addr_list(hw, mc_addr_list, + mc_addr_count); +} + +/** + * e1000_force_mac_fc - Force MAC flow control + * @hw: pointer to the HW structure + * + * Force the MAC's flow control settings. Currently no func pointer exists + * and all implementations are handled in the generic version of this + * function. + **/ +s32 e1000_force_mac_fc(struct e1000_hw *hw) +{ + return e1000_force_mac_fc_generic(hw); +} + +/** + * e1000_check_for_link - Check/Store link connection + * @hw: pointer to the HW structure + * + * This checks the link condition of the adapter and stores the + * results in the hw->mac structure. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_check_for_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.check_for_link) + return hw->mac.ops.check_for_link(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_check_mng_mode - Check management mode + * @hw: pointer to the HW structure + * + * This checks if the adapter has manageability enabled. + * This is a function pointer entry point called by drivers. + **/ +bool e1000_check_mng_mode(struct e1000_hw *hw) +{ + if (hw->mac.ops.check_mng_mode) + return hw->mac.ops.check_mng_mode(hw); + + return false; +} + +/** + * e1000_mng_write_dhcp_info - Writes DHCP info to host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface + * @length: size of the buffer + * + * Writes the DHCP information to the host interface. + **/ +s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length) +{ + return e1000_mng_write_dhcp_info_generic(hw, buffer, length); +} + +/** + * e1000_reset_hw - Reset hardware + * @hw: pointer to the HW structure + * + * This resets the hardware into a known state. This is a function pointer + * entry point called by drivers. + **/ +s32 e1000_reset_hw(struct e1000_hw *hw) +{ + if (hw->mac.ops.reset_hw) + return hw->mac.ops.reset_hw(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_init_hw - Initialize hardware + * @hw: pointer to the HW structure + * + * This inits the hardware readying it for operation. This is a function + * pointer entry point called by drivers. + **/ +s32 e1000_init_hw(struct e1000_hw *hw) +{ + if (hw->mac.ops.init_hw) + return hw->mac.ops.init_hw(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_setup_link - Configures link and flow control + * @hw: pointer to the HW structure + * + * This configures link and flow control settings for the adapter. This + * is a function pointer entry point called by drivers. While modules can + * also call this, they probably call their own version of this function. + **/ +s32 e1000_setup_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.setup_link) + return hw->mac.ops.setup_link(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_get_speed_and_duplex - Returns current speed and duplex + * @hw: pointer to the HW structure + * @speed: pointer to a 16-bit value to store the speed + * @duplex: pointer to a 16-bit value to store the duplex. + * + * This returns the speed and duplex of the adapter in the two 'out' + * variables passed in. This is a function pointer entry point called + * by drivers. + **/ +s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex) +{ + if (hw->mac.ops.get_link_up_info) + return hw->mac.ops.get_link_up_info(hw, speed, duplex); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_setup_led - Configures SW controllable LED + * @hw: pointer to the HW structure + * + * This prepares the SW controllable LED for use and saves the current state + * of the LED so it can be later restored. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_setup_led(struct e1000_hw *hw) +{ + if (hw->mac.ops.setup_led) + return hw->mac.ops.setup_led(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_cleanup_led - Restores SW controllable LED + * @hw: pointer to the HW structure + * + * This restores the SW controllable LED to the value saved off by + * e1000_setup_led. This is a function pointer entry point called by drivers. + **/ +s32 e1000_cleanup_led(struct e1000_hw *hw) +{ + if (hw->mac.ops.cleanup_led) + return hw->mac.ops.cleanup_led(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_blink_led - Blink SW controllable LED + * @hw: pointer to the HW structure + * + * This starts the adapter LED blinking. Request the LED to be setup first + * and cleaned up after. This is a function pointer entry point called by + * drivers. + **/ +s32 e1000_blink_led(struct e1000_hw *hw) +{ + if (hw->mac.ops.blink_led) + return hw->mac.ops.blink_led(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_id_led_init - store LED configurations in SW + * @hw: pointer to the HW structure + * + * Initializes the LED config in SW. This is a function pointer entry point + * called by drivers. + **/ +s32 e1000_id_led_init(struct e1000_hw *hw) +{ + if (hw->mac.ops.id_led_init) + return hw->mac.ops.id_led_init(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_led_on - Turn on SW controllable LED + * @hw: pointer to the HW structure + * + * Turns the SW defined LED on. This is a function pointer entry point + * called by drivers. + **/ +s32 e1000_led_on(struct e1000_hw *hw) +{ + if (hw->mac.ops.led_on) + return hw->mac.ops.led_on(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_led_off - Turn off SW controllable LED + * @hw: pointer to the HW structure + * + * Turns the SW defined LED off. This is a function pointer entry point + * called by drivers. + **/ +s32 e1000_led_off(struct e1000_hw *hw) +{ + if (hw->mac.ops.led_off) + return hw->mac.ops.led_off(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_reset_adaptive - Reset adaptive IFS + * @hw: pointer to the HW structure + * + * Resets the adaptive IFS. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + **/ +void e1000_reset_adaptive(struct e1000_hw *hw) +{ + e1000_reset_adaptive_generic(hw); +} + +/** + * e1000_update_adaptive - Update adaptive IFS + * @hw: pointer to the HW structure + * + * Updates adapter IFS. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + **/ +void e1000_update_adaptive(struct e1000_hw *hw) +{ + e1000_update_adaptive_generic(hw); +} + +/** + * e1000_disable_pcie_master - Disable PCI-Express master access + * @hw: pointer to the HW structure + * + * Disables PCI-Express master access and verifies there are no pending + * requests. Currently no func pointer exists and all implementations are + * handled in the generic version of this function. + **/ +s32 e1000_disable_pcie_master(struct e1000_hw *hw) +{ + return e1000_disable_pcie_master_generic(hw); +} + +/** + * e1000_config_collision_dist - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. + **/ +void e1000_config_collision_dist(struct e1000_hw *hw) +{ + if (hw->mac.ops.config_collision_dist) + hw->mac.ops.config_collision_dist(hw); +} + +/** + * e1000_rar_set - Sets a receive address register + * @hw: pointer to the HW structure + * @addr: address to set the RAR to + * @index: the RAR to set + * + * Sets a Receive Address Register (RAR) to the specified address. + **/ +void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index) +{ + if (hw->mac.ops.rar_set) + hw->mac.ops.rar_set(hw, addr, index); +} + +/** + * e1000_validate_mdi_setting - Ensures valid MDI/MDIX SW state + * @hw: pointer to the HW structure + * + * Ensures that the MDI/MDIX SW state is valid. + **/ +s32 e1000_validate_mdi_setting(struct e1000_hw *hw) +{ + if (hw->mac.ops.validate_mdi_setting) + return hw->mac.ops.validate_mdi_setting(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_hash_mc_addr - Determines address location in multicast table + * @hw: pointer to the HW structure + * @mc_addr: Multicast address to hash. + * + * This hashes an address to determine its location in the multicast + * table. Currently no func pointer exists and all implementations + * are handled in the generic version of this function. + **/ +u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr) +{ + return e1000_hash_mc_addr_generic(hw, mc_addr); +} + +/** + * e1000_enable_tx_pkt_filtering - Enable packet filtering on TX + * @hw: pointer to the HW structure + * + * Enables packet filtering on transmit packets if manageability is enabled + * and host interface is enabled. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw) +{ + return e1000_enable_tx_pkt_filtering_generic(hw); +} + +/** + * e1000_mng_host_if_write - Writes to the manageability host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface buffer + * @length: size of the buffer + * @offset: location in the buffer to write to + * @sum: sum of the data (not checksum) + * + * This function writes the buffer content at the offset given on the host if. + * It also does alignment considerations to do the writes in most efficient + * way. Also fills up the sum of the buffer in *buffer parameter. + **/ +s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, + u16 offset, u8 *sum) +{ + return e1000_mng_host_if_write_generic(hw, buffer, length, offset, sum); +} + +/** + * e1000_mng_write_cmd_header - Writes manageability command header + * @hw: pointer to the HW structure + * @hdr: pointer to the host interface command header + * + * Writes the command header after does the checksum calculation. + **/ +s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr) +{ + return e1000_mng_write_cmd_header_generic(hw, hdr); +} + +/** + * e1000_mng_enable_host_if - Checks host interface is enabled + * @hw: pointer to the HW structure + * + * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND + * + * This function checks whether the HOST IF is enabled for command operation + * and also checks whether the previous command is completed. It busy waits + * in case of previous command is not completed. + **/ +s32 e1000_mng_enable_host_if(struct e1000_hw *hw) +{ + return e1000_mng_enable_host_if_generic(hw); +} + +/** + * e1000_check_reset_block - Verifies PHY can be reset + * @hw: pointer to the HW structure + * + * Checks if the PHY is in a state that can be reset or if manageability + * has it tied up. This is a function pointer entry point called by drivers. + **/ +s32 e1000_check_reset_block(struct e1000_hw *hw) +{ + if (hw->phy.ops.check_reset_block) + return hw->phy.ops.check_reset_block(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_read_phy_reg - Reads PHY register + * @hw: pointer to the HW structure + * @offset: the register to read + * @data: the buffer to store the 16-bit read. + * + * Reads the PHY register and returns the value in data. + * This is a function pointer entry point called by drivers. + **/ +s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data) +{ + if (hw->phy.ops.read_reg) + return hw->phy.ops.read_reg(hw, offset, data); + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg - Writes PHY register + * @hw: pointer to the HW structure + * @offset: the register to write + * @data: the value to write. + * + * Writes the PHY register at offset with the value in data. + * This is a function pointer entry point called by drivers. + **/ +s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data) +{ + if (hw->phy.ops.write_reg) + return hw->phy.ops.write_reg(hw, offset, data); + + return E1000_SUCCESS; +} + +/** + * e1000_release_phy - Generic release PHY + * @hw: pointer to the HW structure + * + * Return if silicon family does not require a semaphore when accessing the + * PHY. + **/ +void e1000_release_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.release) + hw->phy.ops.release(hw); +} + +/** + * e1000_acquire_phy - Generic acquire PHY + * @hw: pointer to the HW structure + * + * Return success if silicon family does not require a semaphore when + * accessing the PHY. + **/ +s32 e1000_acquire_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.acquire) + return hw->phy.ops.acquire(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_read_kmrn_reg - Reads register using Kumeran interface + * @hw: pointer to the HW structure + * @offset: the register to read + * @data: the location to store the 16-bit value read. + * + * Reads a register out of the Kumeran interface. Currently no func pointer + * exists and all implementations are handled in the generic version of + * this function. + **/ +s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return e1000_read_kmrn_reg_generic(hw, offset, data); +} + +/** + * e1000_write_kmrn_reg - Writes register using Kumeran interface + * @hw: pointer to the HW structure + * @offset: the register to write + * @data: the value to write. + * + * Writes a register to the Kumeran interface. Currently no func pointer + * exists and all implementations are handled in the generic version of + * this function. + **/ +s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data) +{ + return e1000_write_kmrn_reg_generic(hw, offset, data); +} + +/** + * e1000_get_cable_length - Retrieves cable length estimation + * @hw: pointer to the HW structure + * + * This function estimates the cable length and stores them in + * hw->phy.min_length and hw->phy.max_length. This is a function pointer + * entry point called by drivers. + **/ +s32 e1000_get_cable_length(struct e1000_hw *hw) +{ + if (hw->phy.ops.get_cable_length) + return hw->phy.ops.get_cable_length(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_get_phy_info - Retrieves PHY information from registers + * @hw: pointer to the HW structure + * + * This function gets some information from various PHY registers and + * populates hw->phy values with it. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_get_phy_info(struct e1000_hw *hw) +{ + if (hw->phy.ops.get_info) + return hw->phy.ops.get_info(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_phy_hw_reset - Hard PHY reset + * @hw: pointer to the HW structure + * + * Performs a hard PHY reset. This is a function pointer entry point called + * by drivers. + **/ +s32 e1000_phy_hw_reset(struct e1000_hw *hw) +{ + if (hw->phy.ops.reset) + return hw->phy.ops.reset(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_phy_commit - Soft PHY reset + * @hw: pointer to the HW structure + * + * Performs a soft PHY reset on those that apply. This is a function pointer + * entry point called by drivers. + **/ +s32 e1000_phy_commit(struct e1000_hw *hw) +{ + if (hw->phy.ops.commit) + return hw->phy.ops.commit(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_set_d0_lplu_state - Sets low power link up state for D0 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D0 + * and SmartSpeed is disabled when active is true, else clear lplu for D0 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. This is a function pointer entry point called by drivers. + **/ +s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active) +{ + if (hw->phy.ops.set_d0_lplu_state) + return hw->phy.ops.set_d0_lplu_state(hw, active); + + return E1000_SUCCESS; +} + +/** + * e1000_set_d3_lplu_state - Sets low power link up state for D3 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D3 + * and SmartSpeed is disabled when active is true, else clear lplu for D3 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. This is a function pointer entry point called by drivers. + **/ +s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active) +{ + if (hw->phy.ops.set_d3_lplu_state) + return hw->phy.ops.set_d3_lplu_state(hw, active); + + return E1000_SUCCESS; +} + +/** + * e1000_read_mac_addr - Reads MAC address + * @hw: pointer to the HW structure + * + * Reads the MAC address out of the adapter and stores it in the HW structure. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +s32 e1000_read_mac_addr(struct e1000_hw *hw) +{ + if (hw->mac.ops.read_mac_addr) + return hw->mac.ops.read_mac_addr(hw); + + return e1000_read_mac_addr_generic(hw); +} + +/** + * e1000_read_pba_string - Read device part number string + * @hw: pointer to the HW structure + * @pba_num: pointer to device part number + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number from the EEPROM and stores + * the value in pba_num. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size) +{ + return e1000_read_pba_string_generic(hw, pba_num, pba_num_size); +} + +/** + * e1000_read_pba_length - Read device part number string length + * @hw: pointer to the HW structure + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number length from the EEPROM and + * stores the value in pba_num. + * Currently no func pointer exists and all implementations are handled in the + * generic version of this function. + **/ +s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size) +{ + return e1000_read_pba_length_generic(hw, pba_num_size); +} + +/** + * e1000_validate_nvm_checksum - Verifies NVM (EEPROM) checksum + * @hw: pointer to the HW structure + * + * Validates the NVM checksum is correct. This is a function pointer entry + * point called by drivers. + **/ +s32 e1000_validate_nvm_checksum(struct e1000_hw *hw) +{ + if (hw->nvm.ops.validate) + return hw->nvm.ops.validate(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_update_nvm_checksum - Updates NVM (EEPROM) checksum + * @hw: pointer to the HW structure + * + * Updates the NVM checksum. Currently no func pointer exists and all + * implementations are handled in the generic version of this function. + **/ +s32 e1000_update_nvm_checksum(struct e1000_hw *hw) +{ + if (hw->nvm.ops.update) + return hw->nvm.ops.update(hw); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_reload_nvm - Reloads EEPROM + * @hw: pointer to the HW structure + * + * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the + * extended control register. + **/ +void e1000_reload_nvm(struct e1000_hw *hw) +{ + if (hw->nvm.ops.reload) + hw->nvm.ops.reload(hw); +} + +/** + * e1000_read_nvm - Reads NVM (EEPROM) + * @hw: pointer to the HW structure + * @offset: the word offset to read + * @words: number of 16-bit words to read + * @data: pointer to the properly sized buffer for the data. + * + * Reads 16-bit chunks of data from the NVM (EEPROM). This is a function + * pointer entry point called by drivers. + **/ +s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + if (hw->nvm.ops.read) + return hw->nvm.ops.read(hw, offset, words, data); + + return -E1000_ERR_CONFIG; +} + +/** + * e1000_write_nvm - Writes to NVM (EEPROM) + * @hw: pointer to the HW structure + * @offset: the word offset to read + * @words: number of 16-bit words to write + * @data: pointer to the properly sized buffer for the data. + * + * Writes 16-bit chunks of data to the NVM (EEPROM). This is a function + * pointer entry point called by drivers. + **/ +s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + if (hw->nvm.ops.write) + return hw->nvm.ops.write(hw, offset, words, data); + + return E1000_SUCCESS; +} + +/** + * e1000_write_8bit_ctrl_reg - Writes 8bit Control register + * @hw: pointer to the HW structure + * @reg: 32bit register offset + * @offset: the register to write + * @data: the value to write. + * + * Writes the PHY register at offset with the value in data. + * This is a function pointer entry point called by drivers. + **/ +s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, + u8 data) +{ + return e1000_write_8bit_ctrl_reg_generic(hw, reg, offset, data); +} + +/** + * e1000_power_up_phy - Restores link in case of PHY power down + * @hw: pointer to the HW structure + * + * The phy may be powered down to save power, to turn off link when the + * driver is unloaded, or wake on lan is not enabled (among others). + **/ +void e1000_power_up_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.power_up) + hw->phy.ops.power_up(hw); + + e1000_setup_link(hw); +} + +/** + * e1000_power_down_phy - Power down PHY + * @hw: pointer to the HW structure + * + * The phy may be powered down to save power, to turn off link when the + * driver is unloaded, or wake on lan is not enabled (among others). + **/ +void e1000_power_down_phy(struct e1000_hw *hw) +{ + if (hw->phy.ops.power_down) + hw->phy.ops.power_down(hw); +} + +/** + * e1000_power_up_fiber_serdes_link - Power up serdes link + * @hw: pointer to the HW structure + * + * Power on the optics and PCS. + **/ +void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.power_up_serdes) + hw->mac.ops.power_up_serdes(hw); +} + +/** + * e1000_shutdown_fiber_serdes_link - Remove link during power down + * @hw: pointer to the HW structure + * + * Shutdown the optics and PCS on driver unload. + **/ +void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw) +{ + if (hw->mac.ops.shutdown_serdes) + hw->mac.ops.shutdown_serdes(hw); +} + +/** + * e1000_get_thermal_sensor_data - Gathers thermal sensor data + * @hw: pointer to hardware structure + * + * Updates the temperatures in mac.thermal_sensor_data + **/ +s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw) +{ + if (hw->mac.ops.get_thermal_sensor_data) + return hw->mac.ops.get_thermal_sensor_data(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_init_thermal_sensor_thresh - Sets thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Sets the thermal sensor thresholds according to the NVM map + **/ +s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw) +{ + if (hw->mac.ops.init_thermal_sensor_thresh) + return hw->mac.ops.init_thermal_sensor_thresh(hw); + + return E1000_SUCCESS; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h new file mode 100644 index 00000000..b21294ec --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_api.h @@ -0,0 +1,157 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_API_H_ +#define _E1000_API_H_ + +#include "e1000_hw.h" + +extern void e1000_init_function_pointers_82575(struct e1000_hw *hw); +extern void e1000_rx_fifo_flush_82575(struct e1000_hw *hw); +extern void e1000_init_function_pointers_vf(struct e1000_hw *hw); +extern void e1000_power_up_fiber_serdes_link(struct e1000_hw *hw); +extern void e1000_shutdown_fiber_serdes_link(struct e1000_hw *hw); +extern void e1000_init_function_pointers_i210(struct e1000_hw *hw); + +s32 e1000_set_obff_timer(struct e1000_hw *hw, u32 itr); +s32 e1000_set_mac_type(struct e1000_hw *hw); +s32 e1000_setup_init_funcs(struct e1000_hw *hw, bool init_device); +s32 e1000_init_mac_params(struct e1000_hw *hw); +s32 e1000_init_nvm_params(struct e1000_hw *hw); +s32 e1000_init_phy_params(struct e1000_hw *hw); +s32 e1000_init_mbx_params(struct e1000_hw *hw); +s32 e1000_get_bus_info(struct e1000_hw *hw); +void e1000_clear_vfta(struct e1000_hw *hw); +void e1000_write_vfta(struct e1000_hw *hw, u32 offset, u32 value); +s32 e1000_force_mac_fc(struct e1000_hw *hw); +s32 e1000_check_for_link(struct e1000_hw *hw); +s32 e1000_reset_hw(struct e1000_hw *hw); +s32 e1000_init_hw(struct e1000_hw *hw); +s32 e1000_setup_link(struct e1000_hw *hw); +s32 e1000_get_speed_and_duplex(struct e1000_hw *hw, u16 *speed, u16 *duplex); +s32 e1000_disable_pcie_master(struct e1000_hw *hw); +void e1000_config_collision_dist(struct e1000_hw *hw); +void e1000_rar_set(struct e1000_hw *hw, u8 *addr, u32 index); +u32 e1000_hash_mc_addr(struct e1000_hw *hw, u8 *mc_addr); +void e1000_update_mc_addr_list(struct e1000_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count); +s32 e1000_setup_led(struct e1000_hw *hw); +s32 e1000_cleanup_led(struct e1000_hw *hw); +s32 e1000_check_reset_block(struct e1000_hw *hw); +s32 e1000_blink_led(struct e1000_hw *hw); +s32 e1000_led_on(struct e1000_hw *hw); +s32 e1000_led_off(struct e1000_hw *hw); +s32 e1000_id_led_init(struct e1000_hw *hw); +void e1000_reset_adaptive(struct e1000_hw *hw); +void e1000_update_adaptive(struct e1000_hw *hw); +s32 e1000_get_cable_length(struct e1000_hw *hw); +s32 e1000_validate_mdi_setting(struct e1000_hw *hw); +s32 e1000_read_phy_reg(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_phy_reg(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_8bit_ctrl_reg(struct e1000_hw *hw, u32 reg, u32 offset, + u8 data); +s32 e1000_get_phy_info(struct e1000_hw *hw); +void e1000_release_phy(struct e1000_hw *hw); +s32 e1000_acquire_phy(struct e1000_hw *hw); +s32 e1000_phy_hw_reset(struct e1000_hw *hw); +s32 e1000_phy_commit(struct e1000_hw *hw); +void e1000_power_up_phy(struct e1000_hw *hw); +void e1000_power_down_phy(struct e1000_hw *hw); +s32 e1000_read_mac_addr(struct e1000_hw *hw); +s32 e1000_read_pba_string(struct e1000_hw *hw, u8 *pba_num, u32 pba_num_size); +s32 e1000_read_pba_length(struct e1000_hw *hw, u32 *pba_num_size); +void e1000_reload_nvm(struct e1000_hw *hw); +s32 e1000_update_nvm_checksum(struct e1000_hw *hw); +s32 e1000_validate_nvm_checksum(struct e1000_hw *hw); +s32 e1000_read_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_nvm(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_set_d3_lplu_state(struct e1000_hw *hw, bool active); +s32 e1000_set_d0_lplu_state(struct e1000_hw *hw, bool active); +bool e1000_check_mng_mode(struct e1000_hw *hw); +bool e1000_enable_tx_pkt_filtering(struct e1000_hw *hw); +s32 e1000_mng_enable_host_if(struct e1000_hw *hw); +s32 e1000_mng_host_if_write(struct e1000_hw *hw, u8 *buffer, u16 length, + u16 offset, u8 *sum); +s32 e1000_mng_write_cmd_header(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr); +s32 e1000_mng_write_dhcp_info(struct e1000_hw *hw, u8 *buffer, u16 length); +s32 e1000_get_thermal_sensor_data(struct e1000_hw *hw); +s32 e1000_init_thermal_sensor_thresh(struct e1000_hw *hw); + + + +/* + * TBI_ACCEPT macro definition: + * + * This macro requires: + * adapter = a pointer to struct e1000_hw + * status = the 8 bit status field of the Rx descriptor with EOP set + * error = the 8 bit error field of the Rx descriptor with EOP set + * length = the sum of all the length fields of the Rx descriptors that + * make up the current frame + * last_byte = the last byte of the frame DMAed by the hardware + * max_frame_length = the maximum frame length we want to accept. + * min_frame_length = the minimum frame length we want to accept. + * + * This macro is a conditional that should be used in the interrupt + * handler's Rx processing routine when RxErrors have been detected. + * + * Typical use: + * ... + * if (TBI_ACCEPT) { + * accept_frame = true; + * e1000_tbi_adjust_stats(adapter, MacAddress); + * frame_length--; + * } else { + * accept_frame = false; + * } + * ... + */ + +/* The carrier extension symbol, as received by the NIC. */ +#define CARRIER_EXTENSION 0x0F + +#define TBI_ACCEPT(a, status, errors, length, last_byte, \ + min_frame_size, max_frame_size) \ + (e1000_tbi_sbp_enabled_82543(a) && \ + (((errors) & E1000_RXD_ERR_FRAME_ERR_MASK) == E1000_RXD_ERR_CE) && \ + ((last_byte) == CARRIER_EXTENSION) && \ + (((status) & E1000_RXD_STAT_VP) ? \ + (((length) > (min_frame_size - VLAN_TAG_SIZE)) && \ + ((length) <= (max_frame_size + 1))) : \ + (((length) > min_frame_size) && \ + ((length) <= (max_frame_size + VLAN_TAG_SIZE + 1))))) + +#ifndef E1000_MAX +#define E1000_MAX(a, b) ((a) > (b) ? (a) : (b)) +#endif +#ifndef E1000_DIVIDE_ROUND_UP +#define E1000_DIVIDE_ROUND_UP(a, b) (((a) + (b) - 1) / (b)) /* ceil(a/b) */ +#endif +#endif /* _E1000_API_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h new file mode 100644 index 00000000..63b228c5 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_defines.h @@ -0,0 +1,1380 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_DEFINES_H_ +#define _E1000_DEFINES_H_ + +/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define REQ_RX_DESCRIPTOR_MULTIPLE 8 + +/* Definitions for power management and wakeup registers */ +/* Wake Up Control */ +#define E1000_WUC_APME 0x00000001 /* APM Enable */ +#define E1000_WUC_PME_EN 0x00000002 /* PME Enable */ +#define E1000_WUC_PME_STATUS 0x00000004 /* PME Status */ +#define E1000_WUC_APMPME 0x00000008 /* Assert PME on APM Wakeup */ +#define E1000_WUC_PHY_WAKE 0x00000100 /* if PHY supports wakeup */ + +/* Wake Up Filter Control */ +#define E1000_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define E1000_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define E1000_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define E1000_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define E1000_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define E1000_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ +#define E1000_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ +#define E1000_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ + +/* Wake Up Status */ +#define E1000_WUS_LNKC E1000_WUFC_LNKC +#define E1000_WUS_MAG E1000_WUFC_MAG +#define E1000_WUS_EX E1000_WUFC_EX +#define E1000_WUS_MC E1000_WUFC_MC +#define E1000_WUS_BC E1000_WUFC_BC + +/* Extended Device Control */ +#define E1000_CTRL_EXT_SDP4_DATA 0x00000010 /* SW Definable Pin 4 data */ +#define E1000_CTRL_EXT_SDP6_DATA 0x00000040 /* SW Definable Pin 6 data */ +#define E1000_CTRL_EXT_SDP3_DATA 0x00000080 /* SW Definable Pin 3 data */ +#define E1000_CTRL_EXT_SDP6_DIR 0x00000400 /* Direction of SDP6 0=in 1=out */ +#define E1000_CTRL_EXT_SDP3_DIR 0x00000800 /* Direction of SDP3 0=in 1=out */ +#define E1000_CTRL_EXT_EE_RST 0x00002000 /* Reinitialize from EEPROM */ +/* Physical Func Reset Done Indication */ +#define E1000_CTRL_EXT_PFRSTD 0x00004000 +#define E1000_CTRL_EXT_SPD_BYPS 0x00008000 /* Speed Select Bypass */ +#define E1000_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ +#define E1000_CTRL_EXT_DMA_DYN_CLK_EN 0x00080000 /* DMA Dynamic Clk Gating */ +#define E1000_CTRL_EXT_LINK_MODE_MASK 0x00C00000 +/* Offset of the link mode field in Ctrl Ext register */ +#define E1000_CTRL_EXT_LINK_MODE_OFFSET 22 +#define E1000_CTRL_EXT_LINK_MODE_1000BASE_KX 0x00400000 +#define E1000_CTRL_EXT_LINK_MODE_GMII 0x00000000 +#define E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES 0x00C00000 +#define E1000_CTRL_EXT_LINK_MODE_SGMII 0x00800000 +#define E1000_CTRL_EXT_EIAME 0x01000000 +#define E1000_CTRL_EXT_IRCA 0x00000001 +#define E1000_CTRL_EXT_DRV_LOAD 0x10000000 /* Drv loaded bit for FW */ +#define E1000_CTRL_EXT_IAME 0x08000000 /* Int ACK Auto-mask */ +#define E1000_CTRL_EXT_PBA_CLR 0x80000000 /* PBA Clear */ +#define E1000_I2CCMD_REG_ADDR_SHIFT 16 +#define E1000_I2CCMD_PHY_ADDR_SHIFT 24 +#define E1000_I2CCMD_OPCODE_READ 0x08000000 +#define E1000_I2CCMD_OPCODE_WRITE 0x00000000 +#define E1000_I2CCMD_READY 0x20000000 +#define E1000_I2CCMD_ERROR 0x80000000 +#define E1000_I2CCMD_SFP_DATA_ADDR(a) (0x0000 + (a)) +#define E1000_I2CCMD_SFP_DIAG_ADDR(a) (0x0100 + (a)) +#define E1000_MAX_SGMII_PHY_REG_ADDR 255 +#define E1000_I2CCMD_PHY_TIMEOUT 200 +#define E1000_IVAR_VALID 0x80 +#define E1000_GPIE_NSICR 0x00000001 +#define E1000_GPIE_MSIX_MODE 0x00000010 +#define E1000_GPIE_EIAME 0x40000000 +#define E1000_GPIE_PBA 0x80000000 + +/* Receive Descriptor bit definitions */ +#define E1000_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define E1000_RXD_STAT_EOP 0x02 /* End of Packet */ +#define E1000_RXD_STAT_IXSM 0x04 /* Ignore checksum */ +#define E1000_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ +#define E1000_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ +#define E1000_RXD_STAT_TCPCS 0x20 /* TCP xsum calculated */ +#define E1000_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ +#define E1000_RXD_STAT_PIF 0x80 /* passed in-exact filter */ +#define E1000_RXD_STAT_IPIDV 0x200 /* IP identification valid */ +#define E1000_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ +#define E1000_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ +#define E1000_RXD_ERR_CE 0x01 /* CRC Error */ +#define E1000_RXD_ERR_SE 0x02 /* Symbol Error */ +#define E1000_RXD_ERR_SEQ 0x04 /* Sequence Error */ +#define E1000_RXD_ERR_CXE 0x10 /* Carrier Extension Error */ +#define E1000_RXD_ERR_TCPE 0x20 /* TCP/UDP Checksum Error */ +#define E1000_RXD_ERR_IPE 0x40 /* IP Checksum Error */ +#define E1000_RXD_ERR_RXE 0x80 /* Rx Data Error */ +#define E1000_RXD_SPC_VLAN_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ + +#define E1000_RXDEXT_STATERR_TST 0x00000100 /* Time Stamp taken */ +#define E1000_RXDEXT_STATERR_LB 0x00040000 +#define E1000_RXDEXT_STATERR_CE 0x01000000 +#define E1000_RXDEXT_STATERR_SE 0x02000000 +#define E1000_RXDEXT_STATERR_SEQ 0x04000000 +#define E1000_RXDEXT_STATERR_CXE 0x10000000 +#define E1000_RXDEXT_STATERR_TCPE 0x20000000 +#define E1000_RXDEXT_STATERR_IPE 0x40000000 +#define E1000_RXDEXT_STATERR_RXE 0x80000000 + +/* mask to determine if packets should be dropped due to frame errors */ +#define E1000_RXD_ERR_FRAME_ERR_MASK ( \ + E1000_RXD_ERR_CE | \ + E1000_RXD_ERR_SE | \ + E1000_RXD_ERR_SEQ | \ + E1000_RXD_ERR_CXE | \ + E1000_RXD_ERR_RXE) + +/* Same mask, but for extended and packet split descriptors */ +#define E1000_RXDEXT_ERR_FRAME_ERR_MASK ( \ + E1000_RXDEXT_STATERR_CE | \ + E1000_RXDEXT_STATERR_SE | \ + E1000_RXDEXT_STATERR_SEQ | \ + E1000_RXDEXT_STATERR_CXE | \ + E1000_RXDEXT_STATERR_RXE) + +#define E1000_MRQC_RSS_FIELD_MASK 0xFFFF0000 +#define E1000_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 +#define E1000_MRQC_RSS_FIELD_IPV4 0x00020000 +#define E1000_MRQC_RSS_FIELD_IPV6_TCP_EX 0x00040000 +#define E1000_MRQC_RSS_FIELD_IPV6 0x00100000 +#define E1000_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 + +#define E1000_RXDPS_HDRSTAT_HDRSP 0x00008000 + +/* Management Control */ +#define E1000_MANC_SMBUS_EN 0x00000001 /* SMBus Enabled - RO */ +#define E1000_MANC_ASF_EN 0x00000002 /* ASF Enabled - RO */ +#define E1000_MANC_ARP_EN 0x00002000 /* Enable ARP Request Filtering */ +#define E1000_MANC_RCV_TCO_EN 0x00020000 /* Receive TCO Packets Enabled */ +#define E1000_MANC_BLK_PHY_RST_ON_IDE 0x00040000 /* Block phy resets */ +/* Enable MAC address filtering */ +#define E1000_MANC_EN_MAC_ADDR_FILTER 0x00100000 +/* Enable MNG packets to host memory */ +#define E1000_MANC_EN_MNG2HOST 0x00200000 + +#define E1000_MANC2H_PORT_623 0x00000020 /* Port 0x26f */ +#define E1000_MANC2H_PORT_664 0x00000040 /* Port 0x298 */ +#define E1000_MDEF_PORT_623 0x00000800 /* Port 0x26f */ +#define E1000_MDEF_PORT_664 0x00000400 /* Port 0x298 */ + +/* Receive Control */ +#define E1000_RCTL_RST 0x00000001 /* Software reset */ +#define E1000_RCTL_EN 0x00000002 /* enable */ +#define E1000_RCTL_SBP 0x00000004 /* store bad packet */ +#define E1000_RCTL_UPE 0x00000008 /* unicast promisc enable */ +#define E1000_RCTL_MPE 0x00000010 /* multicast promisc enable */ +#define E1000_RCTL_LPE 0x00000020 /* long packet enable */ +#define E1000_RCTL_LBM_NO 0x00000000 /* no loopback mode */ +#define E1000_RCTL_LBM_MAC 0x00000040 /* MAC loopback mode */ +#define E1000_RCTL_LBM_TCVR 0x000000C0 /* tcvr loopback mode */ +#define E1000_RCTL_DTYP_PS 0x00000400 /* Packet Split descriptor */ +#define E1000_RCTL_RDMTS_HALF 0x00000000 /* Rx desc min thresh size */ +#define E1000_RCTL_MO_SHIFT 12 /* multicast offset shift */ +#define E1000_RCTL_MO_3 0x00003000 /* multicast offset 15:4 */ +#define E1000_RCTL_BAM 0x00008000 /* broadcast enable */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 0 */ +#define E1000_RCTL_SZ_2048 0x00000000 /* Rx buffer size 2048 */ +#define E1000_RCTL_SZ_1024 0x00010000 /* Rx buffer size 1024 */ +#define E1000_RCTL_SZ_512 0x00020000 /* Rx buffer size 512 */ +#define E1000_RCTL_SZ_256 0x00030000 /* Rx buffer size 256 */ +/* these buffer sizes are valid if E1000_RCTL_BSEX is 1 */ +#define E1000_RCTL_SZ_16384 0x00010000 /* Rx buffer size 16384 */ +#define E1000_RCTL_SZ_8192 0x00020000 /* Rx buffer size 8192 */ +#define E1000_RCTL_SZ_4096 0x00030000 /* Rx buffer size 4096 */ +#define E1000_RCTL_VFE 0x00040000 /* vlan filter enable */ +#define E1000_RCTL_CFIEN 0x00080000 /* canonical form enable */ +#define E1000_RCTL_CFI 0x00100000 /* canonical form indicator */ +#define E1000_RCTL_DPF 0x00400000 /* discard pause frames */ +#define E1000_RCTL_PMCF 0x00800000 /* pass MAC control frames */ +#define E1000_RCTL_BSEX 0x02000000 /* Buffer size extension */ +#define E1000_RCTL_SECRC 0x04000000 /* Strip Ethernet CRC */ + +/* Use byte values for the following shift parameters + * Usage: + * psrctl |= (((ROUNDUP(value0, 128) >> E1000_PSRCTL_BSIZE0_SHIFT) & + * E1000_PSRCTL_BSIZE0_MASK) | + * ((ROUNDUP(value1, 1024) >> E1000_PSRCTL_BSIZE1_SHIFT) & + * E1000_PSRCTL_BSIZE1_MASK) | + * ((ROUNDUP(value2, 1024) << E1000_PSRCTL_BSIZE2_SHIFT) & + * E1000_PSRCTL_BSIZE2_MASK) | + * ((ROUNDUP(value3, 1024) << E1000_PSRCTL_BSIZE3_SHIFT) |; + * E1000_PSRCTL_BSIZE3_MASK)) + * where value0 = [128..16256], default=256 + * value1 = [1024..64512], default=4096 + * value2 = [0..64512], default=4096 + * value3 = [0..64512], default=0 + */ + +#define E1000_PSRCTL_BSIZE0_MASK 0x0000007F +#define E1000_PSRCTL_BSIZE1_MASK 0x00003F00 +#define E1000_PSRCTL_BSIZE2_MASK 0x003F0000 +#define E1000_PSRCTL_BSIZE3_MASK 0x3F000000 + +#define E1000_PSRCTL_BSIZE0_SHIFT 7 /* Shift _right_ 7 */ +#define E1000_PSRCTL_BSIZE1_SHIFT 2 /* Shift _right_ 2 */ +#define E1000_PSRCTL_BSIZE2_SHIFT 6 /* Shift _left_ 6 */ +#define E1000_PSRCTL_BSIZE3_SHIFT 14 /* Shift _left_ 14 */ + +/* SWFW_SYNC Definitions */ +#define E1000_SWFW_EEP_SM 0x01 +#define E1000_SWFW_PHY0_SM 0x02 +#define E1000_SWFW_PHY1_SM 0x04 +#define E1000_SWFW_CSR_SM 0x08 +#define E1000_SWFW_PHY2_SM 0x20 +#define E1000_SWFW_PHY3_SM 0x40 +#define E1000_SWFW_SW_MNG_SM 0x400 + +/* Device Control */ +#define E1000_CTRL_FD 0x00000001 /* Full duplex.0=half; 1=full */ +#define E1000_CTRL_PRIOR 0x00000004 /* Priority on PCI. 0=rx,1=fair */ +#define E1000_CTRL_GIO_MASTER_DISABLE 0x00000004 /*Blocks new Master reqs */ +#define E1000_CTRL_LRST 0x00000008 /* Link reset. 0=normal,1=reset */ +#define E1000_CTRL_ASDE 0x00000020 /* Auto-speed detect enable */ +#define E1000_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ +#define E1000_CTRL_ILOS 0x00000080 /* Invert Loss-Of Signal */ +#define E1000_CTRL_SPD_SEL 0x00000300 /* Speed Select Mask */ +#define E1000_CTRL_SPD_10 0x00000000 /* Force 10Mb */ +#define E1000_CTRL_SPD_100 0x00000100 /* Force 100Mb */ +#define E1000_CTRL_SPD_1000 0x00000200 /* Force 1Gb */ +#define E1000_CTRL_FRCSPD 0x00000800 /* Force Speed */ +#define E1000_CTRL_FRCDPX 0x00001000 /* Force Duplex */ +#define E1000_CTRL_SWDPIN0 0x00040000 /* SWDPIN 0 value */ +#define E1000_CTRL_SWDPIN1 0x00080000 /* SWDPIN 1 value */ +#define E1000_CTRL_SWDPIN2 0x00100000 /* SWDPIN 2 value */ +#define E1000_CTRL_ADVD3WUC 0x00100000 /* D3 WUC */ +#define E1000_CTRL_SWDPIN3 0x00200000 /* SWDPIN 3 value */ +#define E1000_CTRL_SWDPIO0 0x00400000 /* SWDPIN 0 Input or output */ +#define E1000_CTRL_RST 0x04000000 /* Global reset */ +#define E1000_CTRL_RFCE 0x08000000 /* Receive Flow Control enable */ +#define E1000_CTRL_TFCE 0x10000000 /* Transmit flow control enable */ +#define E1000_CTRL_VME 0x40000000 /* IEEE VLAN mode enable */ +#define E1000_CTRL_PHY_RST 0x80000000 /* PHY Reset */ +#define E1000_CTRL_I2C_ENA 0x02000000 /* I2C enable */ + + +#define E1000_CONNSW_ENRGSRC 0x4 +#define E1000_CONNSW_PHYSD 0x400 +#define E1000_CONNSW_PHY_PDN 0x800 +#define E1000_CONNSW_SERDESD 0x200 +#define E1000_CONNSW_AUTOSENSE_CONF 0x2 +#define E1000_CONNSW_AUTOSENSE_EN 0x1 +#define E1000_PCS_CFG_PCS_EN 8 +#define E1000_PCS_LCTL_FLV_LINK_UP 1 +#define E1000_PCS_LCTL_FSV_10 0 +#define E1000_PCS_LCTL_FSV_100 2 +#define E1000_PCS_LCTL_FSV_1000 4 +#define E1000_PCS_LCTL_FDV_FULL 8 +#define E1000_PCS_LCTL_FSD 0x10 +#define E1000_PCS_LCTL_FORCE_LINK 0x20 +#define E1000_PCS_LCTL_FORCE_FCTRL 0x80 +#define E1000_PCS_LCTL_AN_ENABLE 0x10000 +#define E1000_PCS_LCTL_AN_RESTART 0x20000 +#define E1000_PCS_LCTL_AN_TIMEOUT 0x40000 +#define E1000_ENABLE_SERDES_LOOPBACK 0x0410 + +#define E1000_PCS_LSTS_LINK_OK 1 +#define E1000_PCS_LSTS_SPEED_100 2 +#define E1000_PCS_LSTS_SPEED_1000 4 +#define E1000_PCS_LSTS_DUPLEX_FULL 8 +#define E1000_PCS_LSTS_SYNK_OK 0x10 +#define E1000_PCS_LSTS_AN_COMPLETE 0x10000 + +/* Device Status */ +#define E1000_STATUS_FD 0x00000001 /* Duplex 0=half 1=full */ +#define E1000_STATUS_LU 0x00000002 /* Link up.0=no,1=link */ +#define E1000_STATUS_FUNC_MASK 0x0000000C /* PCI Function Mask */ +#define E1000_STATUS_FUNC_SHIFT 2 +#define E1000_STATUS_FUNC_1 0x00000004 /* Function 1 */ +#define E1000_STATUS_TXOFF 0x00000010 /* transmission paused */ +#define E1000_STATUS_SPEED_MASK 0x000000C0 +#define E1000_STATUS_SPEED_10 0x00000000 /* Speed 10Mb/s */ +#define E1000_STATUS_SPEED_100 0x00000040 /* Speed 100Mb/s */ +#define E1000_STATUS_SPEED_1000 0x00000080 /* Speed 1000Mb/s */ +#define E1000_STATUS_LAN_INIT_DONE 0x00000200 /* Lan Init Compltn by NVM */ +#define E1000_STATUS_PHYRA 0x00000400 /* PHY Reset Asserted */ +#define E1000_STATUS_GIO_MASTER_ENABLE 0x00080000 /* Master request status */ +#define E1000_STATUS_2P5_SKU 0x00001000 /* Val of 2.5GBE SKU strap */ +#define E1000_STATUS_2P5_SKU_OVER 0x00002000 /* Val of 2.5GBE SKU Over */ + +#define SPEED_10 10 +#define SPEED_100 100 +#define SPEED_1000 1000 +#define SPEED_2500 2500 +#define HALF_DUPLEX 1 +#define FULL_DUPLEX 2 + + +#define ADVERTISE_10_HALF 0x0001 +#define ADVERTISE_10_FULL 0x0002 +#define ADVERTISE_100_HALF 0x0004 +#define ADVERTISE_100_FULL 0x0008 +#define ADVERTISE_1000_HALF 0x0010 /* Not used, just FYI */ +#define ADVERTISE_1000_FULL 0x0020 + +/* 1000/H is not supported, nor spec-compliant. */ +#define E1000_ALL_SPEED_DUPLEX ( \ + ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ + ADVERTISE_100_FULL | ADVERTISE_1000_FULL) +#define E1000_ALL_NOT_GIG ( \ + ADVERTISE_10_HALF | ADVERTISE_10_FULL | ADVERTISE_100_HALF | \ + ADVERTISE_100_FULL) +#define E1000_ALL_100_SPEED (ADVERTISE_100_HALF | ADVERTISE_100_FULL) +#define E1000_ALL_10_SPEED (ADVERTISE_10_HALF | ADVERTISE_10_FULL) +#define E1000_ALL_HALF_DUPLEX (ADVERTISE_10_HALF | ADVERTISE_100_HALF) + +#define AUTONEG_ADVERTISE_SPEED_DEFAULT E1000_ALL_SPEED_DUPLEX + +/* LED Control */ +#define E1000_LEDCTL_LED0_MODE_MASK 0x0000000F +#define E1000_LEDCTL_LED0_MODE_SHIFT 0 +#define E1000_LEDCTL_LED0_IVRT 0x00000040 +#define E1000_LEDCTL_LED0_BLINK 0x00000080 + +#define E1000_LEDCTL_MODE_LED_ON 0xE +#define E1000_LEDCTL_MODE_LED_OFF 0xF + +/* Transmit Descriptor bit definitions */ +#define E1000_TXD_DTYP_D 0x00100000 /* Data Descriptor */ +#define E1000_TXD_DTYP_C 0x00000000 /* Context Descriptor */ +#define E1000_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define E1000_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define E1000_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define E1000_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define E1000_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define E1000_TXD_CMD_RS 0x08000000 /* Report Status */ +#define E1000_TXD_CMD_RPS 0x10000000 /* Report Packet Sent */ +#define E1000_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ +#define E1000_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define E1000_TXD_CMD_IDE 0x80000000 /* Enable Tidv register */ +#define E1000_TXD_STAT_DD 0x00000001 /* Descriptor Done */ +#define E1000_TXD_STAT_EC 0x00000002 /* Excess Collisions */ +#define E1000_TXD_STAT_LC 0x00000004 /* Late Collisions */ +#define E1000_TXD_STAT_TU 0x00000008 /* Transmit underrun */ +#define E1000_TXD_CMD_TCP 0x01000000 /* TCP packet */ +#define E1000_TXD_CMD_IP 0x02000000 /* IP packet */ +#define E1000_TXD_CMD_TSE 0x04000000 /* TCP Seg enable */ +#define E1000_TXD_STAT_TC 0x00000004 /* Tx Underrun */ +#define E1000_TXD_EXTCMD_TSTAMP 0x00000010 /* IEEE1588 Timestamp packet */ + +/* Transmit Control */ +#define E1000_TCTL_EN 0x00000002 /* enable Tx */ +#define E1000_TCTL_PSP 0x00000008 /* pad short packets */ +#define E1000_TCTL_CT 0x00000ff0 /* collision threshold */ +#define E1000_TCTL_COLD 0x003ff000 /* collision distance */ +#define E1000_TCTL_RTLC 0x01000000 /* Re-transmit on late collision */ +#define E1000_TCTL_MULR 0x10000000 /* Multiple request support */ + +/* Transmit Arbitration Count */ +#define E1000_TARC0_ENABLE 0x00000400 /* Enable Tx Queue 0 */ + +/* SerDes Control */ +#define E1000_SCTL_DISABLE_SERDES_LOOPBACK 0x0400 +#define E1000_SCTL_ENABLE_SERDES_LOOPBACK 0x0410 + +/* Receive Checksum Control */ +#define E1000_RXCSUM_IPOFL 0x00000100 /* IPv4 checksum offload */ +#define E1000_RXCSUM_TUOFL 0x00000200 /* TCP / UDP checksum offload */ +#define E1000_RXCSUM_CRCOFL 0x00000800 /* CRC32 offload enable */ +#define E1000_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ +#define E1000_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ + +/* Header split receive */ +#define E1000_RFCTL_NFSW_DIS 0x00000040 +#define E1000_RFCTL_NFSR_DIS 0x00000080 +#define E1000_RFCTL_ACK_DIS 0x00001000 +#define E1000_RFCTL_EXTEN 0x00008000 +#define E1000_RFCTL_IPV6_EX_DIS 0x00010000 +#define E1000_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 +#define E1000_RFCTL_LEF 0x00040000 + +/* Collision related configuration parameters */ +#define E1000_COLLISION_THRESHOLD 15 +#define E1000_CT_SHIFT 4 +#define E1000_COLLISION_DISTANCE 63 +#define E1000_COLD_SHIFT 12 + +/* Default values for the transmit IPG register */ +#define DEFAULT_82543_TIPG_IPGT_FIBER 9 +#define DEFAULT_82543_TIPG_IPGT_COPPER 8 + +#define E1000_TIPG_IPGT_MASK 0x000003FF + +#define DEFAULT_82543_TIPG_IPGR1 8 +#define E1000_TIPG_IPGR1_SHIFT 10 + +#define DEFAULT_82543_TIPG_IPGR2 6 +#define DEFAULT_80003ES2LAN_TIPG_IPGR2 7 +#define E1000_TIPG_IPGR2_SHIFT 20 + +/* Ethertype field values */ +#define ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.3ac packet */ + +#define ETHERNET_FCS_SIZE 4 +#define MAX_JUMBO_FRAME_SIZE 0x3F00 + +/* Extended Configuration Control and Size */ +#define E1000_EXTCNF_CTRL_MDIO_SW_OWNERSHIP 0x00000020 +#define E1000_EXTCNF_CTRL_LCD_WRITE_ENABLE 0x00000001 +#define E1000_EXTCNF_CTRL_OEM_WRITE_ENABLE 0x00000008 +#define E1000_EXTCNF_CTRL_SWFLAG 0x00000020 +#define E1000_EXTCNF_CTRL_GATE_PHY_CFG 0x00000080 +#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_MASK 0x00FF0000 +#define E1000_EXTCNF_SIZE_EXT_PCIE_LENGTH_SHIFT 16 +#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_MASK 0x0FFF0000 +#define E1000_EXTCNF_CTRL_EXT_CNF_POINTER_SHIFT 16 + +#define E1000_PHY_CTRL_D0A_LPLU 0x00000002 +#define E1000_PHY_CTRL_NOND0A_LPLU 0x00000004 +#define E1000_PHY_CTRL_NOND0A_GBE_DISABLE 0x00000008 +#define E1000_PHY_CTRL_GBE_DISABLE 0x00000040 + +#define E1000_KABGTXD_BGSQLBIAS 0x00050000 + +/* PBA constants */ +#define E1000_PBA_8K 0x0008 /* 8KB */ +#define E1000_PBA_10K 0x000A /* 10KB */ +#define E1000_PBA_12K 0x000C /* 12KB */ +#define E1000_PBA_14K 0x000E /* 14KB */ +#define E1000_PBA_16K 0x0010 /* 16KB */ +#define E1000_PBA_18K 0x0012 +#define E1000_PBA_20K 0x0014 +#define E1000_PBA_22K 0x0016 +#define E1000_PBA_24K 0x0018 +#define E1000_PBA_26K 0x001A +#define E1000_PBA_30K 0x001E +#define E1000_PBA_32K 0x0020 +#define E1000_PBA_34K 0x0022 +#define E1000_PBA_35K 0x0023 +#define E1000_PBA_38K 0x0026 +#define E1000_PBA_40K 0x0028 +#define E1000_PBA_48K 0x0030 /* 48KB */ +#define E1000_PBA_64K 0x0040 /* 64KB */ + +#define E1000_PBA_RXA_MASK 0xFFFF + +#define E1000_PBS_16K E1000_PBA_16K + +#define IFS_MAX 80 +#define IFS_MIN 40 +#define IFS_RATIO 4 +#define IFS_STEP 10 +#define MIN_NUM_XMITS 1000 + +/* SW Semaphore Register */ +#define E1000_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ +#define E1000_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ +#define E1000_SWSM_DRV_LOAD 0x00000008 /* Driver Loaded Bit */ + +#define E1000_SWSM2_LOCK 0x00000002 /* Secondary driver semaphore bit */ + +/* Interrupt Cause Read */ +#define E1000_ICR_TXDW 0x00000001 /* Transmit desc written back */ +#define E1000_ICR_TXQE 0x00000002 /* Transmit Queue empty */ +#define E1000_ICR_LSC 0x00000004 /* Link Status Change */ +#define E1000_ICR_RXSEQ 0x00000008 /* Rx sequence error */ +#define E1000_ICR_RXDMT0 0x00000010 /* Rx desc min. threshold (0) */ +#define E1000_ICR_RXO 0x00000040 /* Rx overrun */ +#define E1000_ICR_RXT0 0x00000080 /* Rx timer intr (ring 0) */ +#define E1000_ICR_VMMB 0x00000100 /* VM MB event */ +#define E1000_ICR_RXCFG 0x00000400 /* Rx /c/ ordered set */ +#define E1000_ICR_GPI_EN0 0x00000800 /* GP Int 0 */ +#define E1000_ICR_GPI_EN1 0x00001000 /* GP Int 1 */ +#define E1000_ICR_GPI_EN2 0x00002000 /* GP Int 2 */ +#define E1000_ICR_GPI_EN3 0x00004000 /* GP Int 3 */ +#define E1000_ICR_TXD_LOW 0x00008000 +#define E1000_ICR_MNG 0x00040000 /* Manageability event */ +#define E1000_ICR_TS 0x00080000 /* Time Sync Interrupt */ +#define E1000_ICR_DRSTA 0x40000000 /* Device Reset Asserted */ +/* If this bit asserted, the driver should claim the interrupt */ +#define E1000_ICR_INT_ASSERTED 0x80000000 +#define E1000_ICR_DOUTSYNC 0x10000000 /* NIC DMA out of sync */ +#define E1000_ICR_FER 0x00400000 /* Fatal Error */ + +#define E1000_ICR_THS 0x00800000 /* ICR.THS: Thermal Sensor Event*/ +#define E1000_ICR_MDDET 0x10000000 /* Malicious Driver Detect */ + + +/* Extended Interrupt Cause Read */ +#define E1000_EICR_RX_QUEUE0 0x00000001 /* Rx Queue 0 Interrupt */ +#define E1000_EICR_RX_QUEUE1 0x00000002 /* Rx Queue 1 Interrupt */ +#define E1000_EICR_RX_QUEUE2 0x00000004 /* Rx Queue 2 Interrupt */ +#define E1000_EICR_RX_QUEUE3 0x00000008 /* Rx Queue 3 Interrupt */ +#define E1000_EICR_TX_QUEUE0 0x00000100 /* Tx Queue 0 Interrupt */ +#define E1000_EICR_TX_QUEUE1 0x00000200 /* Tx Queue 1 Interrupt */ +#define E1000_EICR_TX_QUEUE2 0x00000400 /* Tx Queue 2 Interrupt */ +#define E1000_EICR_TX_QUEUE3 0x00000800 /* Tx Queue 3 Interrupt */ +#define E1000_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ +#define E1000_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ +/* TCP Timer */ +#define E1000_TCPTIMER_KS 0x00000100 /* KickStart */ +#define E1000_TCPTIMER_COUNT_ENABLE 0x00000200 /* Count Enable */ +#define E1000_TCPTIMER_COUNT_FINISH 0x00000400 /* Count finish */ +#define E1000_TCPTIMER_LOOP 0x00000800 /* Loop */ + +/* This defines the bits that are set in the Interrupt Mask + * Set/Read Register. Each bit is documented below: + * o RXT0 = Receiver Timer Interrupt (ring 0) + * o TXDW = Transmit Descriptor Written Back + * o RXDMT0 = Receive Descriptor Minimum Threshold hit (ring 0) + * o RXSEQ = Receive Sequence Error + * o LSC = Link Status Change + */ +#define IMS_ENABLE_MASK ( \ + E1000_IMS_RXT0 | \ + E1000_IMS_TXDW | \ + E1000_IMS_RXDMT0 | \ + E1000_IMS_RXSEQ | \ + E1000_IMS_LSC) + +/* Interrupt Mask Set */ +#define E1000_IMS_TXDW E1000_ICR_TXDW /* Tx desc written back */ +#define E1000_IMS_TXQE E1000_ICR_TXQE /* Transmit Queue empty */ +#define E1000_IMS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_IMS_VMMB E1000_ICR_VMMB /* Mail box activity */ +#define E1000_IMS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ +#define E1000_IMS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ +#define E1000_IMS_RXO E1000_ICR_RXO /* Rx overrun */ +#define E1000_IMS_RXT0 E1000_ICR_RXT0 /* Rx timer intr */ +#define E1000_IMS_TXD_LOW E1000_ICR_TXD_LOW +#define E1000_IMS_TS E1000_ICR_TS /* Time Sync Interrupt */ +#define E1000_IMS_DRSTA E1000_ICR_DRSTA /* Device Reset Asserted */ +#define E1000_IMS_DOUTSYNC E1000_ICR_DOUTSYNC /* NIC DMA out of sync */ +#define E1000_IMS_FER E1000_ICR_FER /* Fatal Error */ + +#define E1000_IMS_THS E1000_ICR_THS /* ICR.TS: Thermal Sensor Event*/ +#define E1000_IMS_MDDET E1000_ICR_MDDET /* Malicious Driver Detect */ +/* Extended Interrupt Mask Set */ +#define E1000_EIMS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ +#define E1000_EIMS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ +#define E1000_EIMS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ +#define E1000_EIMS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ +#define E1000_EIMS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ +#define E1000_EIMS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ +#define E1000_EIMS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ +#define E1000_EIMS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ +#define E1000_EIMS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ +#define E1000_EIMS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ + +/* Interrupt Cause Set */ +#define E1000_ICS_LSC E1000_ICR_LSC /* Link Status Change */ +#define E1000_ICS_RXSEQ E1000_ICR_RXSEQ /* Rx sequence error */ +#define E1000_ICS_RXDMT0 E1000_ICR_RXDMT0 /* Rx desc min. threshold */ + +/* Extended Interrupt Cause Set */ +#define E1000_EICS_RX_QUEUE0 E1000_EICR_RX_QUEUE0 /* Rx Queue 0 Interrupt */ +#define E1000_EICS_RX_QUEUE1 E1000_EICR_RX_QUEUE1 /* Rx Queue 1 Interrupt */ +#define E1000_EICS_RX_QUEUE2 E1000_EICR_RX_QUEUE2 /* Rx Queue 2 Interrupt */ +#define E1000_EICS_RX_QUEUE3 E1000_EICR_RX_QUEUE3 /* Rx Queue 3 Interrupt */ +#define E1000_EICS_TX_QUEUE0 E1000_EICR_TX_QUEUE0 /* Tx Queue 0 Interrupt */ +#define E1000_EICS_TX_QUEUE1 E1000_EICR_TX_QUEUE1 /* Tx Queue 1 Interrupt */ +#define E1000_EICS_TX_QUEUE2 E1000_EICR_TX_QUEUE2 /* Tx Queue 2 Interrupt */ +#define E1000_EICS_TX_QUEUE3 E1000_EICR_TX_QUEUE3 /* Tx Queue 3 Interrupt */ +#define E1000_EICS_TCP_TIMER E1000_EICR_TCP_TIMER /* TCP Timer */ +#define E1000_EICS_OTHER E1000_EICR_OTHER /* Interrupt Cause Active */ + +#define E1000_EITR_ITR_INT_MASK 0x0000FFFF +/* E1000_EITR_CNT_IGNR is only for 82576 and newer */ +#define E1000_EITR_CNT_IGNR 0x80000000 /* Don't reset counters on write */ +#define E1000_EITR_INTERVAL 0x00007FFC + +/* Transmit Descriptor Control */ +#define E1000_TXDCTL_PTHRESH 0x0000003F /* TXDCTL Prefetch Threshold */ +#define E1000_TXDCTL_HTHRESH 0x00003F00 /* TXDCTL Host Threshold */ +#define E1000_TXDCTL_WTHRESH 0x003F0000 /* TXDCTL Writeback Threshold */ +#define E1000_TXDCTL_GRAN 0x01000000 /* TXDCTL Granularity */ +#define E1000_TXDCTL_FULL_TX_DESC_WB 0x01010000 /* GRAN=1, WTHRESH=1 */ +#define E1000_TXDCTL_MAX_TX_DESC_PREFETCH 0x0100001F /* GRAN=1, PTHRESH=31 */ +/* Enable the counting of descriptors still to be processed. */ +#define E1000_TXDCTL_COUNT_DESC 0x00400000 + +/* Flow Control Constants */ +#define FLOW_CONTROL_ADDRESS_LOW 0x00C28001 +#define FLOW_CONTROL_ADDRESS_HIGH 0x00000100 +#define FLOW_CONTROL_TYPE 0x8808 + +/* 802.1q VLAN Packet Size */ +#define VLAN_TAG_SIZE 4 /* 802.3ac tag (not DMA'd) */ +#define E1000_VLAN_FILTER_TBL_SIZE 128 /* VLAN Filter Table (4096 bits) */ + +/* Receive Address + * Number of high/low register pairs in the RAR. The RAR (Receive Address + * Registers) holds the directed and multicast addresses that we monitor. + * Technically, we have 16 spots. However, we reserve one of these spots + * (RAR[15]) for our directed address used by controllers with + * manageability enabled, allowing us room for 15 multicast addresses. + */ +#define E1000_RAR_ENTRIES 15 +#define E1000_RAH_AV 0x80000000 /* Receive descriptor valid */ +#define E1000_RAL_MAC_ADDR_LEN 4 +#define E1000_RAH_MAC_ADDR_LEN 2 +#define E1000_RAH_QUEUE_MASK_82575 0x000C0000 +#define E1000_RAH_POOL_1 0x00040000 + +/* Error Codes */ +#define E1000_SUCCESS 0 +#define E1000_ERR_NVM 1 +#define E1000_ERR_PHY 2 +#define E1000_ERR_CONFIG 3 +#define E1000_ERR_PARAM 4 +#define E1000_ERR_MAC_INIT 5 +#define E1000_ERR_PHY_TYPE 6 +#define E1000_ERR_RESET 9 +#define E1000_ERR_MASTER_REQUESTS_PENDING 10 +#define E1000_ERR_HOST_INTERFACE_COMMAND 11 +#define E1000_BLK_PHY_RESET 12 +#define E1000_ERR_SWFW_SYNC 13 +#define E1000_NOT_IMPLEMENTED 14 +#define E1000_ERR_MBX 15 +#define E1000_ERR_INVALID_ARGUMENT 16 +#define E1000_ERR_NO_SPACE 17 +#define E1000_ERR_NVM_PBA_SECTION 18 +#define E1000_ERR_I2C 19 +#define E1000_ERR_INVM_VALUE_NOT_FOUND 20 + +/* Loop limit on how long we wait for auto-negotiation to complete */ +#define FIBER_LINK_UP_LIMIT 50 +#define COPPER_LINK_UP_LIMIT 10 +#define PHY_AUTO_NEG_LIMIT 45 +#define PHY_FORCE_LIMIT 20 +/* Number of 100 microseconds we wait for PCI Express master disable */ +#define MASTER_DISABLE_TIMEOUT 800 +/* Number of milliseconds we wait for PHY configuration done after MAC reset */ +#define PHY_CFG_TIMEOUT 100 +/* Number of 2 milliseconds we wait for acquiring MDIO ownership. */ +#define MDIO_OWNERSHIP_TIMEOUT 10 +/* Number of milliseconds for NVM auto read done after MAC reset. */ +#define AUTO_READ_DONE_TIMEOUT 10 + +/* Flow Control */ +#define E1000_FCRTH_RTH 0x0000FFF8 /* Mask Bits[15:3] for RTH */ +#define E1000_FCRTL_RTL 0x0000FFF8 /* Mask Bits[15:3] for RTL */ +#define E1000_FCRTL_XONE 0x80000000 /* Enable XON frame transmission */ + +/* Transmit Configuration Word */ +#define E1000_TXCW_FD 0x00000020 /* TXCW full duplex */ +#define E1000_TXCW_PAUSE 0x00000080 /* TXCW sym pause request */ +#define E1000_TXCW_ASM_DIR 0x00000100 /* TXCW astm pause direction */ +#define E1000_TXCW_PAUSE_MASK 0x00000180 /* TXCW pause request mask */ +#define E1000_TXCW_ANE 0x80000000 /* Auto-neg enable */ + +/* Receive Configuration Word */ +#define E1000_RXCW_CW 0x0000ffff /* RxConfigWord mask */ +#define E1000_RXCW_IV 0x08000000 /* Receive config invalid */ +#define E1000_RXCW_C 0x20000000 /* Receive config */ +#define E1000_RXCW_SYNCH 0x40000000 /* Receive config synch */ + +#define E1000_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ +#define E1000_TSYNCTXCTL_ENABLED 0x00000010 /* enable Tx timestamping */ + +#define E1000_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ +#define E1000_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ +#define E1000_TSYNCRXCTL_TYPE_L2_V2 0x00 +#define E1000_TSYNCRXCTL_TYPE_L4_V1 0x02 +#define E1000_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define E1000_TSYNCRXCTL_TYPE_ALL 0x08 +#define E1000_TSYNCRXCTL_TYPE_EVENT_V2 0x0A +#define E1000_TSYNCRXCTL_ENABLED 0x00000010 /* enable Rx timestamping */ +#define E1000_TSYNCRXCTL_SYSCFI 0x00000020 /* Sys clock frequency */ + +#define E1000_TSYNCRXCFG_PTP_V1_CTRLT_MASK 0x000000FF +#define E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE 0x00 +#define E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE 0x01 +#define E1000_TSYNCRXCFG_PTP_V1_FOLLOWUP_MESSAGE 0x02 +#define E1000_TSYNCRXCFG_PTP_V1_DELAY_RESP_MESSAGE 0x03 +#define E1000_TSYNCRXCFG_PTP_V1_MANAGEMENT_MESSAGE 0x04 + +#define E1000_TSYNCRXCFG_PTP_V2_MSGID_MASK 0x00000F00 +#define E1000_TSYNCRXCFG_PTP_V2_SYNC_MESSAGE 0x0000 +#define E1000_TSYNCRXCFG_PTP_V2_DELAY_REQ_MESSAGE 0x0100 +#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_REQ_MESSAGE 0x0200 +#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_RESP_MESSAGE 0x0300 +#define E1000_TSYNCRXCFG_PTP_V2_FOLLOWUP_MESSAGE 0x0800 +#define E1000_TSYNCRXCFG_PTP_V2_DELAY_RESP_MESSAGE 0x0900 +#define E1000_TSYNCRXCFG_PTP_V2_PATH_DELAY_FOLLOWUP_MESSAGE 0x0A00 +#define E1000_TSYNCRXCFG_PTP_V2_ANNOUNCE_MESSAGE 0x0B00 +#define E1000_TSYNCRXCFG_PTP_V2_SIGNALLING_MESSAGE 0x0C00 +#define E1000_TSYNCRXCFG_PTP_V2_MANAGEMENT_MESSAGE 0x0D00 + +#define E1000_TIMINCA_16NS_SHIFT 24 +#define E1000_TIMINCA_INCPERIOD_SHIFT 24 +#define E1000_TIMINCA_INCVALUE_MASK 0x00FFFFFF + +#define E1000_TSICR_TXTS 0x00000002 +#define E1000_TSIM_TXTS 0x00000002 +/* TUPLE Filtering Configuration */ +#define E1000_TTQF_DISABLE_MASK 0xF0008000 /* TTQF Disable Mask */ +#define E1000_TTQF_QUEUE_ENABLE 0x100 /* TTQF Queue Enable Bit */ +#define E1000_TTQF_PROTOCOL_MASK 0xFF /* TTQF Protocol Mask */ +/* TTQF TCP Bit, shift with E1000_TTQF_PROTOCOL SHIFT */ +#define E1000_TTQF_PROTOCOL_TCP 0x0 +/* TTQF UDP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ +#define E1000_TTQF_PROTOCOL_UDP 0x1 +/* TTQF SCTP Bit, shift with E1000_TTQF_PROTOCOL_SHIFT */ +#define E1000_TTQF_PROTOCOL_SCTP 0x2 +#define E1000_TTQF_PROTOCOL_SHIFT 5 /* TTQF Protocol Shift */ +#define E1000_TTQF_QUEUE_SHIFT 16 /* TTQF Queue Shfit */ +#define E1000_TTQF_RX_QUEUE_MASK 0x70000 /* TTQF Queue Mask */ +#define E1000_TTQF_MASK_ENABLE 0x10000000 /* TTQF Mask Enable Bit */ +#define E1000_IMIR_CLEAR_MASK 0xF001FFFF /* IMIR Reg Clear Mask */ +#define E1000_IMIR_PORT_BYPASS 0x20000 /* IMIR Port Bypass Bit */ +#define E1000_IMIR_PRIORITY_SHIFT 29 /* IMIR Priority Shift */ +#define E1000_IMIREXT_CLEAR_MASK 0x7FFFF /* IMIREXT Reg Clear Mask */ + +#define E1000_MDICNFG_EXT_MDIO 0x80000000 /* MDI ext/int destination */ +#define E1000_MDICNFG_COM_MDIO 0x40000000 /* MDI shared w/ lan 0 */ +#define E1000_MDICNFG_PHY_MASK 0x03E00000 +#define E1000_MDICNFG_PHY_SHIFT 21 + +#define E1000_MEDIA_PORT_COPPER 1 +#define E1000_MEDIA_PORT_OTHER 2 +#define E1000_M88E1112_AUTO_COPPER_SGMII 0x2 +#define E1000_M88E1112_AUTO_COPPER_BASEX 0x3 +#define E1000_M88E1112_STATUS_LINK 0x0004 /* Interface Link Bit */ +#define E1000_M88E1112_MAC_CTRL_1 0x10 +#define E1000_M88E1112_MAC_CTRL_1_MODE_MASK 0x0380 /* Mode Select */ +#define E1000_M88E1112_MAC_CTRL_1_MODE_SHIFT 7 +#define E1000_M88E1112_PAGE_ADDR 0x16 +#define E1000_M88E1112_STATUS 0x01 + +#define E1000_THSTAT_LOW_EVENT 0x20000000 /* Low thermal threshold */ +#define E1000_THSTAT_MID_EVENT 0x00200000 /* Mid thermal threshold */ +#define E1000_THSTAT_HIGH_EVENT 0x00002000 /* High thermal threshold */ +#define E1000_THSTAT_PWR_DOWN 0x00000001 /* Power Down Event */ +#define E1000_THSTAT_LINK_THROTTLE 0x00000002 /* Link Spd Throttle Event */ + +/* I350 EEE defines */ +#define E1000_IPCNFG_EEE_1G_AN 0x00000008 /* IPCNFG EEE Ena 1G AN */ +#define E1000_IPCNFG_EEE_100M_AN 0x00000004 /* IPCNFG EEE Ena 100M AN */ +#define E1000_EEER_TX_LPI_EN 0x00010000 /* EEER Tx LPI Enable */ +#define E1000_EEER_RX_LPI_EN 0x00020000 /* EEER Rx LPI Enable */ +#define E1000_EEER_LPI_FC 0x00040000 /* EEER Ena on Flow Cntrl */ +/* EEE status */ +#define E1000_EEER_EEE_NEG 0x20000000 /* EEE capability nego */ +#define E1000_EEER_RX_LPI_STATUS 0x40000000 /* Rx in LPI state */ +#define E1000_EEER_TX_LPI_STATUS 0x80000000 /* Tx in LPI state */ +#define E1000_EEE_LP_ADV_ADDR_I350 0x040F /* EEE LP Advertisement */ +#define E1000_M88E1543_PAGE_ADDR 0x16 /* Page Offset Register */ +#define E1000_M88E1543_EEE_CTRL_1 0x0 +#define E1000_M88E1543_EEE_CTRL_1_MS 0x0001 /* EEE Master/Slave */ +#define E1000_EEE_ADV_DEV_I354 7 +#define E1000_EEE_ADV_ADDR_I354 60 +#define E1000_EEE_ADV_100_SUPPORTED (1 << 1) /* 100BaseTx EEE Supported */ +#define E1000_EEE_ADV_1000_SUPPORTED (1 << 2) /* 1000BaseT EEE Supported */ +#define E1000_PCS_STATUS_DEV_I354 3 +#define E1000_PCS_STATUS_ADDR_I354 1 +#define E1000_PCS_STATUS_RX_LPI_RCVD 0x0400 +#define E1000_PCS_STATUS_TX_LPI_RCVD 0x0800 +#define E1000_EEE_SU_LPI_CLK_STP 0x00800000 /* EEE LPI Clock Stop */ +#define E1000_EEE_LP_ADV_DEV_I210 7 /* EEE LP Adv Device */ +#define E1000_EEE_LP_ADV_ADDR_I210 61 /* EEE LP Adv Register */ +/* PCI Express Control */ +#define E1000_GCR_RXD_NO_SNOOP 0x00000001 +#define E1000_GCR_RXDSCW_NO_SNOOP 0x00000002 +#define E1000_GCR_RXDSCR_NO_SNOOP 0x00000004 +#define E1000_GCR_TXD_NO_SNOOP 0x00000008 +#define E1000_GCR_TXDSCW_NO_SNOOP 0x00000010 +#define E1000_GCR_TXDSCR_NO_SNOOP 0x00000020 +#define E1000_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define E1000_GCR_CMPL_TMOUT_10ms 0x00001000 +#define E1000_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define E1000_GCR_CAP_VER2 0x00040000 + +#define PCIE_NO_SNOOP_ALL (E1000_GCR_RXD_NO_SNOOP | \ + E1000_GCR_RXDSCW_NO_SNOOP | \ + E1000_GCR_RXDSCR_NO_SNOOP | \ + E1000_GCR_TXD_NO_SNOOP | \ + E1000_GCR_TXDSCW_NO_SNOOP | \ + E1000_GCR_TXDSCR_NO_SNOOP) + +#define E1000_MMDAC_FUNC_DATA 0x4000 /* Data, no post increment */ + +/* mPHY address control and data registers */ +#define E1000_MPHY_ADDR_CTL 0x0024 /* Address Control Reg */ +#define E1000_MPHY_ADDR_CTL_OFFSET_MASK 0xFFFF0000 +#define E1000_MPHY_DATA 0x0E10 /* Data Register */ + +/* AFE CSR Offset for PCS CLK */ +#define E1000_MPHY_PCS_CLK_REG_OFFSET 0x0004 +/* Override for near end digital loopback. */ +#define E1000_MPHY_PCS_CLK_REG_DIGINELBEN 0x10 + +/* PHY Control Register */ +#define MII_CR_SPEED_SELECT_MSB 0x0040 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_COLL_TEST_ENABLE 0x0080 /* Collision test enable */ +#define MII_CR_FULL_DUPLEX 0x0100 /* FDX =1, half duplex =0 */ +#define MII_CR_RESTART_AUTO_NEG 0x0200 /* Restart auto negotiation */ +#define MII_CR_ISOLATE 0x0400 /* Isolate PHY from MII */ +#define MII_CR_POWER_DOWN 0x0800 /* Power down */ +#define MII_CR_AUTO_NEG_EN 0x1000 /* Auto Neg Enable */ +#define MII_CR_SPEED_SELECT_LSB 0x2000 /* bits 6,13: 10=1000, 01=100, 00=10 */ +#define MII_CR_LOOPBACK 0x4000 /* 0 = normal, 1 = loopback */ +#define MII_CR_RESET 0x8000 /* 0 = normal, 1 = PHY reset */ +#define MII_CR_SPEED_1000 0x0040 +#define MII_CR_SPEED_100 0x2000 +#define MII_CR_SPEED_10 0x0000 + +/* PHY Status Register */ +#define MII_SR_EXTENDED_CAPS 0x0001 /* Extended register capabilities */ +#define MII_SR_JABBER_DETECT 0x0002 /* Jabber Detected */ +#define MII_SR_LINK_STATUS 0x0004 /* Link Status 1 = link */ +#define MII_SR_AUTONEG_CAPS 0x0008 /* Auto Neg Capable */ +#define MII_SR_REMOTE_FAULT 0x0010 /* Remote Fault Detect */ +#define MII_SR_AUTONEG_COMPLETE 0x0020 /* Auto Neg Complete */ +#define MII_SR_PREAMBLE_SUPPRESS 0x0040 /* Preamble may be suppressed */ +#define MII_SR_EXTENDED_STATUS 0x0100 /* Ext. status info in Reg 0x0F */ +#define MII_SR_100T2_HD_CAPS 0x0200 /* 100T2 Half Duplex Capable */ +#define MII_SR_100T2_FD_CAPS 0x0400 /* 100T2 Full Duplex Capable */ +#define MII_SR_10T_HD_CAPS 0x0800 /* 10T Half Duplex Capable */ +#define MII_SR_10T_FD_CAPS 0x1000 /* 10T Full Duplex Capable */ +#define MII_SR_100X_HD_CAPS 0x2000 /* 100X Half Duplex Capable */ +#define MII_SR_100X_FD_CAPS 0x4000 /* 100X Full Duplex Capable */ +#define MII_SR_100T4_CAPS 0x8000 /* 100T4 Capable */ + +/* Autoneg Advertisement Register */ +#define NWAY_AR_SELECTOR_FIELD 0x0001 /* indicates IEEE 802.3 CSMA/CD */ +#define NWAY_AR_10T_HD_CAPS 0x0020 /* 10T Half Duplex Capable */ +#define NWAY_AR_10T_FD_CAPS 0x0040 /* 10T Full Duplex Capable */ +#define NWAY_AR_100TX_HD_CAPS 0x0080 /* 100TX Half Duplex Capable */ +#define NWAY_AR_100TX_FD_CAPS 0x0100 /* 100TX Full Duplex Capable */ +#define NWAY_AR_100T4_CAPS 0x0200 /* 100T4 Capable */ +#define NWAY_AR_PAUSE 0x0400 /* Pause operation desired */ +#define NWAY_AR_ASM_DIR 0x0800 /* Asymmetric Pause Direction bit */ +#define NWAY_AR_REMOTE_FAULT 0x2000 /* Remote Fault detected */ +#define NWAY_AR_NEXT_PAGE 0x8000 /* Next Page ability supported */ + +/* Link Partner Ability Register (Base Page) */ +#define NWAY_LPAR_SELECTOR_FIELD 0x0000 /* LP protocol selector field */ +#define NWAY_LPAR_10T_HD_CAPS 0x0020 /* LP 10T Half Dplx Capable */ +#define NWAY_LPAR_10T_FD_CAPS 0x0040 /* LP 10T Full Dplx Capable */ +#define NWAY_LPAR_100TX_HD_CAPS 0x0080 /* LP 100TX Half Dplx Capable */ +#define NWAY_LPAR_100TX_FD_CAPS 0x0100 /* LP 100TX Full Dplx Capable */ +#define NWAY_LPAR_100T4_CAPS 0x0200 /* LP is 100T4 Capable */ +#define NWAY_LPAR_PAUSE 0x0400 /* LP Pause operation desired */ +#define NWAY_LPAR_ASM_DIR 0x0800 /* LP Asym Pause Direction bit */ +#define NWAY_LPAR_REMOTE_FAULT 0x2000 /* LP detected Remote Fault */ +#define NWAY_LPAR_ACKNOWLEDGE 0x4000 /* LP rx'd link code word */ +#define NWAY_LPAR_NEXT_PAGE 0x8000 /* Next Page ability supported */ + +/* Autoneg Expansion Register */ +#define NWAY_ER_LP_NWAY_CAPS 0x0001 /* LP has Auto Neg Capability */ +#define NWAY_ER_PAGE_RXD 0x0002 /* LP 10T Half Dplx Capable */ +#define NWAY_ER_NEXT_PAGE_CAPS 0x0004 /* LP 10T Full Dplx Capable */ +#define NWAY_ER_LP_NEXT_PAGE_CAPS 0x0008 /* LP 100TX Half Dplx Capable */ +#define NWAY_ER_PAR_DETECT_FAULT 0x0010 /* LP 100TX Full Dplx Capable */ + +/* 1000BASE-T Control Register */ +#define CR_1000T_ASYM_PAUSE 0x0080 /* Advertise asymmetric pause bit */ +#define CR_1000T_HD_CAPS 0x0100 /* Advertise 1000T HD capability */ +#define CR_1000T_FD_CAPS 0x0200 /* Advertise 1000T FD capability */ +/* 1=Repeater/switch device port 0=DTE device */ +#define CR_1000T_REPEATER_DTE 0x0400 +/* 1=Configure PHY as Master 0=Configure PHY as Slave */ +#define CR_1000T_MS_VALUE 0x0800 +/* 1=Master/Slave manual config value 0=Automatic Master/Slave config */ +#define CR_1000T_MS_ENABLE 0x1000 +#define CR_1000T_TEST_MODE_NORMAL 0x0000 /* Normal Operation */ +#define CR_1000T_TEST_MODE_1 0x2000 /* Transmit Waveform test */ +#define CR_1000T_TEST_MODE_2 0x4000 /* Master Transmit Jitter test */ +#define CR_1000T_TEST_MODE_3 0x6000 /* Slave Transmit Jitter test */ +#define CR_1000T_TEST_MODE_4 0x8000 /* Transmitter Distortion test */ + +/* 1000BASE-T Status Register */ +#define SR_1000T_IDLE_ERROR_CNT 0x00FF /* Num idle err since last rd */ +#define SR_1000T_ASYM_PAUSE_DIR 0x0100 /* LP asym pause direction bit */ +#define SR_1000T_LP_HD_CAPS 0x0400 /* LP is 1000T HD capable */ +#define SR_1000T_LP_FD_CAPS 0x0800 /* LP is 1000T FD capable */ +#define SR_1000T_REMOTE_RX_STATUS 0x1000 /* Remote receiver OK */ +#define SR_1000T_LOCAL_RX_STATUS 0x2000 /* Local receiver OK */ +#define SR_1000T_MS_CONFIG_RES 0x4000 /* 1=Local Tx Master, 0=Slave */ +#define SR_1000T_MS_CONFIG_FAULT 0x8000 /* Master/Slave config fault */ + +#define SR_1000T_PHY_EXCESSIVE_IDLE_ERR_COUNT 5 + +/* PHY 1000 MII Register/Bit Definitions */ +/* PHY Registers defined by IEEE */ +#define PHY_CONTROL 0x00 /* Control Register */ +#define PHY_STATUS 0x01 /* Status Register */ +#define PHY_ID1 0x02 /* Phy Id Reg (word 1) */ +#define PHY_ID2 0x03 /* Phy Id Reg (word 2) */ +#define PHY_AUTONEG_ADV 0x04 /* Autoneg Advertisement */ +#define PHY_LP_ABILITY 0x05 /* Link Partner Ability (Base Page) */ +#define PHY_AUTONEG_EXP 0x06 /* Autoneg Expansion Reg */ +#define PHY_NEXT_PAGE_TX 0x07 /* Next Page Tx */ +#define PHY_LP_NEXT_PAGE 0x08 /* Link Partner Next Page */ +#define PHY_1000T_CTRL 0x09 /* 1000Base-T Control Reg */ +#define PHY_1000T_STATUS 0x0A /* 1000Base-T Status Reg */ +#define PHY_EXT_STATUS 0x0F /* Extended Status Reg */ + +#define PHY_CONTROL_LB 0x4000 /* PHY Loopback bit */ + +/* NVM Control */ +#define E1000_EECD_SK 0x00000001 /* NVM Clock */ +#define E1000_EECD_CS 0x00000002 /* NVM Chip Select */ +#define E1000_EECD_DI 0x00000004 /* NVM Data In */ +#define E1000_EECD_DO 0x00000008 /* NVM Data Out */ +#define E1000_EECD_REQ 0x00000040 /* NVM Access Request */ +#define E1000_EECD_GNT 0x00000080 /* NVM Access Grant */ +#define E1000_EECD_PRES 0x00000100 /* NVM Present */ +#define E1000_EECD_SIZE 0x00000200 /* NVM Size (0=64 word 1=256 word) */ +#define E1000_EECD_BLOCKED 0x00008000 /* Bit banging access blocked flag */ +#define E1000_EECD_ABORT 0x00010000 /* NVM operation aborted flag */ +#define E1000_EECD_TIMEOUT 0x00020000 /* NVM read operation timeout flag */ +#define E1000_EECD_ERROR_CLR 0x00040000 /* NVM error status clear bit */ +/* NVM Addressing bits based on type 0=small, 1=large */ +#define E1000_EECD_ADDR_BITS 0x00000400 +#define E1000_NVM_GRANT_ATTEMPTS 1000 /* NVM # attempts to gain grant */ +#define E1000_EECD_AUTO_RD 0x00000200 /* NVM Auto Read done */ +#define E1000_EECD_SIZE_EX_MASK 0x00007800 /* NVM Size */ +#define E1000_EECD_SIZE_EX_SHIFT 11 +#define E1000_EECD_FLUPD 0x00080000 /* Update FLASH */ +#define E1000_EECD_AUPDEN 0x00100000 /* Ena Auto FLASH update */ +#define E1000_EECD_SEC1VAL 0x00400000 /* Sector One Valid */ +#define E1000_EECD_SEC1VAL_VALID_MASK (E1000_EECD_AUTO_RD | E1000_EECD_PRES) +#define E1000_EECD_FLUPD_I210 0x00800000 /* Update FLASH */ +#define E1000_EECD_FLUDONE_I210 0x04000000 /* Update FLASH done */ +#define E1000_EECD_FLASH_DETECTED_I210 0x00080000 /* FLASH detected */ +#define E1000_EECD_SEC1VAL_I210 0x02000000 /* Sector One Valid */ +#define E1000_FLUDONE_ATTEMPTS 20000 +#define E1000_EERD_EEWR_MAX_COUNT 512 /* buffered EEPROM words rw */ +#define E1000_I210_FIFO_SEL_RX 0x00 +#define E1000_I210_FIFO_SEL_TX_QAV(_i) (0x02 + (_i)) +#define E1000_I210_FIFO_SEL_TX_LEGACY E1000_I210_FIFO_SEL_TX_QAV(0) +#define E1000_I210_FIFO_SEL_BMC2OS_TX 0x06 +#define E1000_I210_FIFO_SEL_BMC2OS_RX 0x01 + +#define E1000_I210_FLASH_SECTOR_SIZE 0x1000 /* 4KB FLASH sector unit size */ +/* Secure FLASH mode requires removing MSb */ +#define E1000_I210_FW_PTR_MASK 0x7FFF +/* Firmware code revision field word offset*/ +#define E1000_I210_FW_VER_OFFSET 328 + +#define E1000_NVM_RW_REG_DATA 16 /* Offset to data in NVM read/write regs */ +#define E1000_NVM_RW_REG_DONE 2 /* Offset to READ/WRITE done bit */ +#define E1000_NVM_RW_REG_START 1 /* Start operation */ +#define E1000_NVM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ +#define E1000_NVM_POLL_WRITE 1 /* Flag for polling for write complete */ +#define E1000_NVM_POLL_READ 0 /* Flag for polling for read complete */ +#define E1000_FLASH_UPDATES 2000 + +/* NVM Word Offsets */ +#define NVM_COMPAT 0x0003 +#define NVM_ID_LED_SETTINGS 0x0004 +#define NVM_VERSION 0x0005 +#define E1000_I210_NVM_FW_MODULE_PTR 0x0010 +#define E1000_I350_NVM_FW_MODULE_PTR 0x0051 +#define NVM_FUTURE_INIT_WORD1 0x0019 +#define NVM_ETRACK_WORD 0x0042 +#define NVM_ETRACK_HIWORD 0x0043 +#define NVM_COMB_VER_OFF 0x0083 +#define NVM_COMB_VER_PTR 0x003d + +/* NVM version defines */ +#define NVM_MAJOR_MASK 0xF000 +#define NVM_MINOR_MASK 0x0FF0 +#define NVM_IMAGE_ID_MASK 0x000F +#define NVM_COMB_VER_MASK 0x00FF +#define NVM_MAJOR_SHIFT 12 +#define NVM_MINOR_SHIFT 4 +#define NVM_COMB_VER_SHFT 8 +#define NVM_VER_INVALID 0xFFFF +#define NVM_ETRACK_SHIFT 16 +#define NVM_ETRACK_VALID 0x8000 +#define NVM_NEW_DEC_MASK 0x0F00 +#define NVM_HEX_CONV 16 +#define NVM_HEX_TENS 10 + +/* FW version defines */ +/* Offset of "Loader patch ptr" in Firmware Header */ +#define E1000_I350_NVM_FW_LOADER_PATCH_PTR_OFFSET 0x01 +/* Patch generation hour & minutes */ +#define E1000_I350_NVM_FW_VER_WORD1_OFFSET 0x04 +/* Patch generation month & day */ +#define E1000_I350_NVM_FW_VER_WORD2_OFFSET 0x05 +/* Patch generation year */ +#define E1000_I350_NVM_FW_VER_WORD3_OFFSET 0x06 +/* Patch major & minor numbers */ +#define E1000_I350_NVM_FW_VER_WORD4_OFFSET 0x07 + +#define NVM_MAC_ADDR 0x0000 +#define NVM_SUB_DEV_ID 0x000B +#define NVM_SUB_VEN_ID 0x000C +#define NVM_DEV_ID 0x000D +#define NVM_VEN_ID 0x000E +#define NVM_INIT_CTRL_2 0x000F +#define NVM_INIT_CTRL_4 0x0013 +#define NVM_LED_1_CFG 0x001C +#define NVM_LED_0_2_CFG 0x001F + +#define NVM_COMPAT_VALID_CSUM 0x0001 +#define NVM_FUTURE_INIT_WORD1_VALID_CSUM 0x0040 + +#define NVM_ETS_CFG 0x003E +#define NVM_ETS_LTHRES_DELTA_MASK 0x07C0 +#define NVM_ETS_LTHRES_DELTA_SHIFT 6 +#define NVM_ETS_TYPE_MASK 0x0038 +#define NVM_ETS_TYPE_SHIFT 3 +#define NVM_ETS_TYPE_EMC 0x000 +#define NVM_ETS_NUM_SENSORS_MASK 0x0007 +#define NVM_ETS_DATA_LOC_MASK 0x3C00 +#define NVM_ETS_DATA_LOC_SHIFT 10 +#define NVM_ETS_DATA_INDEX_MASK 0x0300 +#define NVM_ETS_DATA_INDEX_SHIFT 8 +#define NVM_ETS_DATA_HTHRESH_MASK 0x00FF +#define NVM_INIT_CONTROL2_REG 0x000F +#define NVM_INIT_CONTROL3_PORT_B 0x0014 +#define NVM_INIT_3GIO_3 0x001A +#define NVM_SWDEF_PINS_CTRL_PORT_0 0x0020 +#define NVM_INIT_CONTROL3_PORT_A 0x0024 +#define NVM_CFG 0x0012 +#define NVM_ALT_MAC_ADDR_PTR 0x0037 +#define NVM_CHECKSUM_REG 0x003F +#define NVM_COMPATIBILITY_REG_3 0x0003 +#define NVM_COMPATIBILITY_BIT_MASK 0x8000 + +#define E1000_NVM_CFG_DONE_PORT_0 0x040000 /* MNG config cycle done */ +#define E1000_NVM_CFG_DONE_PORT_1 0x080000 /* ...for second port */ +#define E1000_NVM_CFG_DONE_PORT_2 0x100000 /* ...for third port */ +#define E1000_NVM_CFG_DONE_PORT_3 0x200000 /* ...for fourth port */ + +#define NVM_82580_LAN_FUNC_OFFSET(a) ((a) ? (0x40 + (0x40 * (a))) : 0) + +/* Mask bits for fields in Word 0x24 of the NVM */ +#define NVM_WORD24_COM_MDIO 0x0008 /* MDIO interface shared */ +#define NVM_WORD24_EXT_MDIO 0x0004 /* MDIO accesses routed extrnl */ +/* Offset of Link Mode bits for 82575/82576 */ +#define NVM_WORD24_LNK_MODE_OFFSET 8 +/* Offset of Link Mode bits for 82580 up */ +#define NVM_WORD24_82580_LNK_MODE_OFFSET 4 + + +/* Mask bits for fields in Word 0x0f of the NVM */ +#define NVM_WORD0F_PAUSE_MASK 0x3000 +#define NVM_WORD0F_PAUSE 0x1000 +#define NVM_WORD0F_ASM_DIR 0x2000 + +/* Mask bits for fields in Word 0x1a of the NVM */ +#define NVM_WORD1A_ASPM_MASK 0x000C + +/* Mask bits for fields in Word 0x03 of the EEPROM */ +#define NVM_COMPAT_LOM 0x0800 + +/* length of string needed to store PBA number */ +#define E1000_PBANUM_LENGTH 11 + +/* For checksumming, the sum of all words in the NVM should equal 0xBABA. */ +#define NVM_SUM 0xBABA + +/* PBA (printed board assembly) number words */ +#define NVM_PBA_OFFSET_0 8 +#define NVM_PBA_OFFSET_1 9 +#define NVM_PBA_PTR_GUARD 0xFAFA +#define NVM_RESERVED_WORD 0xFFFF +#define NVM_WORD_SIZE_BASE_SHIFT 6 + +/* NVM Commands - SPI */ +#define NVM_MAX_RETRY_SPI 5000 /* Max wait of 5ms, for RDY signal */ +#define NVM_READ_OPCODE_SPI 0x03 /* NVM read opcode */ +#define NVM_WRITE_OPCODE_SPI 0x02 /* NVM write opcode */ +#define NVM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = address bit-8 */ +#define NVM_WREN_OPCODE_SPI 0x06 /* NVM set Write Enable latch */ +#define NVM_RDSR_OPCODE_SPI 0x05 /* NVM read Status register */ + +/* SPI NVM Status Register */ +#define NVM_STATUS_RDY_SPI 0x01 + +/* Word definitions for ID LED Settings */ +#define ID_LED_RESERVED_0000 0x0000 +#define ID_LED_RESERVED_FFFF 0xFFFF +#define ID_LED_DEFAULT ((ID_LED_OFF1_ON2 << 12) | \ + (ID_LED_OFF1_OFF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_DEF1_DEF2)) +#define ID_LED_DEF1_DEF2 0x1 +#define ID_LED_DEF1_ON2 0x2 +#define ID_LED_DEF1_OFF2 0x3 +#define ID_LED_ON1_DEF2 0x4 +#define ID_LED_ON1_ON2 0x5 +#define ID_LED_ON1_OFF2 0x6 +#define ID_LED_OFF1_DEF2 0x7 +#define ID_LED_OFF1_ON2 0x8 +#define ID_LED_OFF1_OFF2 0x9 + +#define IGP_ACTIVITY_LED_MASK 0xFFFFF0FF +#define IGP_ACTIVITY_LED_ENABLE 0x0300 +#define IGP_LED3_MODE 0x07000000 + +/* PCI/PCI-X/PCI-EX Config space */ +#define PCI_HEADER_TYPE_REGISTER 0x0E +#define PCIE_LINK_STATUS 0x12 +#define PCIE_DEVICE_CONTROL2 0x28 + +#define PCI_HEADER_TYPE_MULTIFUNC 0x80 +#define PCIE_LINK_WIDTH_MASK 0x3F0 +#define PCIE_LINK_WIDTH_SHIFT 4 +#define PCIE_LINK_SPEED_MASK 0x0F +#define PCIE_LINK_SPEED_2500 0x01 +#define PCIE_LINK_SPEED_5000 0x02 +#define PCIE_DEVICE_CONTROL2_16ms 0x0005 + +#ifndef ETH_ADDR_LEN +#define ETH_ADDR_LEN 6 +#endif + +#define PHY_REVISION_MASK 0xFFFFFFF0 +#define MAX_PHY_REG_ADDRESS 0x1F /* 5 bit address bus (0-0x1F) */ +#define MAX_PHY_MULTI_PAGE_REG 0xF + +/* Bit definitions for valid PHY IDs. + * I = Integrated + * E = External + */ +#define M88E1000_E_PHY_ID 0x01410C50 +#define M88E1000_I_PHY_ID 0x01410C30 +#define M88E1011_I_PHY_ID 0x01410C20 +#define IGP01E1000_I_PHY_ID 0x02A80380 +#define M88E1111_I_PHY_ID 0x01410CC0 +#define M88E1543_E_PHY_ID 0x01410EA0 +#define M88E1112_E_PHY_ID 0x01410C90 +#define I347AT4_E_PHY_ID 0x01410DC0 +#define M88E1340M_E_PHY_ID 0x01410DF0 +#define GG82563_E_PHY_ID 0x01410CA0 +#define IGP03E1000_E_PHY_ID 0x02A80390 +#define IFE_E_PHY_ID 0x02A80330 +#define IFE_PLUS_E_PHY_ID 0x02A80320 +#define IFE_C_E_PHY_ID 0x02A80310 +#define I82580_I_PHY_ID 0x015403A0 +#define I350_I_PHY_ID 0x015403B0 +#define I210_I_PHY_ID 0x01410C00 +#define IGP04E1000_E_PHY_ID 0x02A80391 +#define M88_VENDOR 0x0141 + +/* M88E1000 Specific Registers */ +#define M88E1000_PHY_SPEC_CTRL 0x10 /* PHY Specific Control Reg */ +#define M88E1000_PHY_SPEC_STATUS 0x11 /* PHY Specific Status Reg */ +#define M88E1000_EXT_PHY_SPEC_CTRL 0x14 /* Extended PHY Specific Cntrl */ +#define M88E1000_RX_ERR_CNTR 0x15 /* Receive Error Counter */ + +#define M88E1000_PHY_PAGE_SELECT 0x1D /* Reg 29 for pg number setting */ +#define M88E1000_PHY_GEN_CONTROL 0x1E /* meaning depends on reg 29 */ + +/* M88E1000 PHY Specific Control Register */ +#define M88E1000_PSCR_POLARITY_REVERSAL 0x0002 /* 1=Polarity Reverse enabled */ +/* MDI Crossover Mode bits 6:5 Manual MDI configuration */ +#define M88E1000_PSCR_MDI_MANUAL_MODE 0x0000 +#define M88E1000_PSCR_MDIX_MANUAL_MODE 0x0020 /* Manual MDIX configuration */ +/* 1000BASE-T: Auto crossover, 100BASE-TX/10BASE-T: MDI Mode */ +#define M88E1000_PSCR_AUTO_X_1000T 0x0040 +/* Auto crossover enabled all speeds */ +#define M88E1000_PSCR_AUTO_X_MODE 0x0060 +#define M88E1000_PSCR_ASSERT_CRS_ON_TX 0x0800 /* 1=Assert CRS on Tx */ + +/* M88E1000 PHY Specific Status Register */ +#define M88E1000_PSSR_REV_POLARITY 0x0002 /* 1=Polarity reversed */ +#define M88E1000_PSSR_DOWNSHIFT 0x0020 /* 1=Downshifted */ +#define M88E1000_PSSR_MDIX 0x0040 /* 1=MDIX; 0=MDI */ +/* 0 = <50M + * 1 = 50-80M + * 2 = 80-110M + * 3 = 110-140M + * 4 = >140M + */ +#define M88E1000_PSSR_CABLE_LENGTH 0x0380 +#define M88E1000_PSSR_LINK 0x0400 /* 1=Link up, 0=Link down */ +#define M88E1000_PSSR_SPD_DPLX_RESOLVED 0x0800 /* 1=Speed & Duplex resolved */ +#define M88E1000_PSSR_SPEED 0xC000 /* Speed, bits 14:15 */ +#define M88E1000_PSSR_1000MBS 0x8000 /* 10=1000Mbs */ + +#define M88E1000_PSSR_CABLE_LENGTH_SHIFT 7 + +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the master + */ +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK 0x0C00 +#define M88E1000_EPSCR_MASTER_DOWNSHIFT_1X 0x0000 +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the slave + */ +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK 0x0300 +#define M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X 0x0100 +#define M88E1000_EPSCR_TX_CLK_25 0x0070 /* 25 MHz TX_CLK */ + +/* Intel I347AT4 Registers */ +#define I347AT4_PCDL 0x10 /* PHY Cable Diagnostics Length */ +#define I347AT4_PCDC 0x15 /* PHY Cable Diagnostics Control */ +#define I347AT4_PAGE_SELECT 0x16 + +/* I347AT4 Extended PHY Specific Control Register */ + +/* Number of times we will attempt to autonegotiate before downshifting if we + * are the master + */ +#define I347AT4_PSCR_DOWNSHIFT_ENABLE 0x0800 +#define I347AT4_PSCR_DOWNSHIFT_MASK 0x7000 +#define I347AT4_PSCR_DOWNSHIFT_1X 0x0000 +#define I347AT4_PSCR_DOWNSHIFT_2X 0x1000 +#define I347AT4_PSCR_DOWNSHIFT_3X 0x2000 +#define I347AT4_PSCR_DOWNSHIFT_4X 0x3000 +#define I347AT4_PSCR_DOWNSHIFT_5X 0x4000 +#define I347AT4_PSCR_DOWNSHIFT_6X 0x5000 +#define I347AT4_PSCR_DOWNSHIFT_7X 0x6000 +#define I347AT4_PSCR_DOWNSHIFT_8X 0x7000 + +/* I347AT4 PHY Cable Diagnostics Control */ +#define I347AT4_PCDC_CABLE_LENGTH_UNIT 0x0400 /* 0=cm 1=meters */ + +/* M88E1112 only registers */ +#define M88E1112_VCT_DSP_DISTANCE 0x001A + +/* M88EC018 Rev 2 specific DownShift settings */ +#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK 0x0E00 +#define M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X 0x0800 + +/* Bits... + * 15-5: page + * 4-0: register offset + */ +#define GG82563_PAGE_SHIFT 5 +#define GG82563_REG(page, reg) \ + (((page) << GG82563_PAGE_SHIFT) | ((reg) & MAX_PHY_REG_ADDRESS)) +#define GG82563_MIN_ALT_REG 30 + +/* GG82563 Specific Registers */ +#define GG82563_PHY_SPEC_CTRL GG82563_REG(0, 16) /* PHY Spec Cntrl */ +#define GG82563_PHY_PAGE_SELECT GG82563_REG(0, 22) /* Page Select */ +#define GG82563_PHY_SPEC_CTRL_2 GG82563_REG(0, 26) /* PHY Spec Cntrl2 */ +#define GG82563_PHY_PAGE_SELECT_ALT GG82563_REG(0, 29) /* Alt Page Select */ + +/* MAC Specific Control Register */ +#define GG82563_PHY_MAC_SPEC_CTRL GG82563_REG(2, 21) + +#define GG82563_PHY_DSP_DISTANCE GG82563_REG(5, 26) /* DSP Distance */ + +/* Page 193 - Port Control Registers */ +/* Kumeran Mode Control */ +#define GG82563_PHY_KMRN_MODE_CTRL GG82563_REG(193, 16) +#define GG82563_PHY_PWR_MGMT_CTRL GG82563_REG(193, 20) /* Pwr Mgt Ctrl */ + +/* Page 194 - KMRN Registers */ +#define GG82563_PHY_INBAND_CTRL GG82563_REG(194, 18) /* Inband Ctrl */ + +/* MDI Control */ +#define E1000_MDIC_REG_MASK 0x001F0000 +#define E1000_MDIC_REG_SHIFT 16 +#define E1000_MDIC_PHY_MASK 0x03E00000 +#define E1000_MDIC_PHY_SHIFT 21 +#define E1000_MDIC_OP_WRITE 0x04000000 +#define E1000_MDIC_OP_READ 0x08000000 +#define E1000_MDIC_READY 0x10000000 +#define E1000_MDIC_ERROR 0x40000000 +#define E1000_MDIC_DEST 0x80000000 + +/* SerDes Control */ +#define E1000_GEN_CTL_READY 0x80000000 +#define E1000_GEN_CTL_ADDRESS_SHIFT 8 +#define E1000_GEN_POLL_TIMEOUT 640 + +/* LinkSec register fields */ +#define E1000_LSECTXCAP_SUM_MASK 0x00FF0000 +#define E1000_LSECTXCAP_SUM_SHIFT 16 +#define E1000_LSECRXCAP_SUM_MASK 0x00FF0000 +#define E1000_LSECRXCAP_SUM_SHIFT 16 + +#define E1000_LSECTXCTRL_EN_MASK 0x00000003 +#define E1000_LSECTXCTRL_DISABLE 0x0 +#define E1000_LSECTXCTRL_AUTH 0x1 +#define E1000_LSECTXCTRL_AUTH_ENCRYPT 0x2 +#define E1000_LSECTXCTRL_AISCI 0x00000020 +#define E1000_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 +#define E1000_LSECTXCTRL_RSV_MASK 0x000000D8 + +#define E1000_LSECRXCTRL_EN_MASK 0x0000000C +#define E1000_LSECRXCTRL_EN_SHIFT 2 +#define E1000_LSECRXCTRL_DISABLE 0x0 +#define E1000_LSECRXCTRL_CHECK 0x1 +#define E1000_LSECRXCTRL_STRICT 0x2 +#define E1000_LSECRXCTRL_DROP 0x3 +#define E1000_LSECRXCTRL_PLSH 0x00000040 +#define E1000_LSECRXCTRL_RP 0x00000080 +#define E1000_LSECRXCTRL_RSV_MASK 0xFFFFFF33 + +/* Tx Rate-Scheduler Config fields */ +#define E1000_RTTBCNRC_RS_ENA 0x80000000 +#define E1000_RTTBCNRC_RF_DEC_MASK 0x00003FFF +#define E1000_RTTBCNRC_RF_INT_SHIFT 14 +#define E1000_RTTBCNRC_RF_INT_MASK \ + (E1000_RTTBCNRC_RF_DEC_MASK << E1000_RTTBCNRC_RF_INT_SHIFT) + +/* DMA Coalescing register fields */ +/* DMA Coalescing Watchdog Timer */ +#define E1000_DMACR_DMACWT_MASK 0x00003FFF +/* DMA Coalescing Rx Threshold */ +#define E1000_DMACR_DMACTHR_MASK 0x00FF0000 +#define E1000_DMACR_DMACTHR_SHIFT 16 +/* Lx when no PCIe transactions */ +#define E1000_DMACR_DMAC_LX_MASK 0x30000000 +#define E1000_DMACR_DMAC_LX_SHIFT 28 +#define E1000_DMACR_DMAC_EN 0x80000000 /* Enable DMA Coalescing */ +/* DMA Coalescing BMC-to-OS Watchdog Enable */ +#define E1000_DMACR_DC_BMC2OSW_EN 0x00008000 + +/* DMA Coalescing Transmit Threshold */ +#define E1000_DMCTXTH_DMCTTHR_MASK 0x00000FFF + +#define E1000_DMCTLX_TTLX_MASK 0x00000FFF /* Time to LX request */ + +/* Rx Traffic Rate Threshold */ +#define E1000_DMCRTRH_UTRESH_MASK 0x0007FFFF +/* Rx packet rate in current window */ +#define E1000_DMCRTRH_LRPRCW 0x80000000 + +/* DMA Coal Rx Traffic Current Count */ +#define E1000_DMCCNT_CCOUNT_MASK 0x01FFFFFF + +/* Flow ctrl Rx Threshold High val */ +#define E1000_FCRTC_RTH_COAL_MASK 0x0003FFF0 +#define E1000_FCRTC_RTH_COAL_SHIFT 4 +/* Lx power decision based on DMA coal */ +#define E1000_PCIEMISC_LX_DECISION 0x00000080 + +#define E1000_RXPBS_CFG_TS_EN 0x80000000 /* Timestamp in Rx buffer */ +#define E1000_RXPBS_SIZE_I210_MASK 0x0000003F /* Rx packet buffer size */ +#define E1000_TXPB0S_SIZE_I210_MASK 0x0000003F /* Tx packet buffer 0 size */ + +/* Proxy Filter Control */ +#define E1000_PROXYFC_D0 0x00000001 /* Enable offload in D0 */ +#define E1000_PROXYFC_EX 0x00000004 /* Directed exact proxy */ +#define E1000_PROXYFC_MC 0x00000008 /* Directed MC Proxy */ +#define E1000_PROXYFC_BC 0x00000010 /* Broadcast Proxy Enable */ +#define E1000_PROXYFC_ARP_DIRECTED 0x00000020 /* Directed ARP Proxy Ena */ +#define E1000_PROXYFC_IPV4 0x00000040 /* Directed IPv4 Enable */ +#define E1000_PROXYFC_IPV6 0x00000080 /* Directed IPv6 Enable */ +#define E1000_PROXYFC_NS 0x00000200 /* IPv6 Neighbor Solicitation */ +#define E1000_PROXYFC_ARP 0x00000800 /* ARP Request Proxy Ena */ +/* Proxy Status */ +#define E1000_PROXYS_CLEAR 0xFFFFFFFF /* Clear */ + +/* Firmware Status */ +#define E1000_FWSTS_FWRI 0x80000000 /* FW Reset Indication */ +/* VF Control */ +#define E1000_VTCTRL_RST 0x04000000 /* Reset VF */ + +#define E1000_STATUS_LAN_ID_MASK 0x00000000C /* Mask for Lan ID field */ +/* Lan ID bit field offset in status register */ +#define E1000_STATUS_LAN_ID_OFFSET 2 +#define E1000_VFTA_ENTRIES 128 +#ifndef E1000_UNUSEDARG +#define E1000_UNUSEDARG +#endif /* E1000_UNUSEDARG */ +#endif /* _E1000_DEFINES_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h new file mode 100644 index 00000000..347cef71 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_hw.h @@ -0,0 +1,793 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_HW_H_ +#define _E1000_HW_H_ + +#include "e1000_osdep.h" +#include "e1000_regs.h" +#include "e1000_defines.h" + +struct e1000_hw; + +#define E1000_DEV_ID_82576 0x10C9 +#define E1000_DEV_ID_82576_FIBER 0x10E6 +#define E1000_DEV_ID_82576_SERDES 0x10E7 +#define E1000_DEV_ID_82576_QUAD_COPPER 0x10E8 +#define E1000_DEV_ID_82576_QUAD_COPPER_ET2 0x1526 +#define E1000_DEV_ID_82576_NS 0x150A +#define E1000_DEV_ID_82576_NS_SERDES 0x1518 +#define E1000_DEV_ID_82576_SERDES_QUAD 0x150D +#define E1000_DEV_ID_82575EB_COPPER 0x10A7 +#define E1000_DEV_ID_82575EB_FIBER_SERDES 0x10A9 +#define E1000_DEV_ID_82575GB_QUAD_COPPER 0x10D6 +#define E1000_DEV_ID_82580_COPPER 0x150E +#define E1000_DEV_ID_82580_FIBER 0x150F +#define E1000_DEV_ID_82580_SERDES 0x1510 +#define E1000_DEV_ID_82580_SGMII 0x1511 +#define E1000_DEV_ID_82580_COPPER_DUAL 0x1516 +#define E1000_DEV_ID_82580_QUAD_FIBER 0x1527 +#define E1000_DEV_ID_I350_COPPER 0x1521 +#define E1000_DEV_ID_I350_FIBER 0x1522 +#define E1000_DEV_ID_I350_SERDES 0x1523 +#define E1000_DEV_ID_I350_SGMII 0x1524 +#define E1000_DEV_ID_I350_DA4 0x1546 +#define E1000_DEV_ID_I210_COPPER 0x1533 +#define E1000_DEV_ID_I210_COPPER_OEM1 0x1534 +#define E1000_DEV_ID_I210_COPPER_IT 0x1535 +#define E1000_DEV_ID_I210_FIBER 0x1536 +#define E1000_DEV_ID_I210_SERDES 0x1537 +#define E1000_DEV_ID_I210_SGMII 0x1538 +#define E1000_DEV_ID_I210_COPPER_FLASHLESS 0x157B +#define E1000_DEV_ID_I210_SERDES_FLASHLESS 0x157C +#define E1000_DEV_ID_I211_COPPER 0x1539 +#define E1000_DEV_ID_I354_BACKPLANE_1GBPS 0x1F40 +#define E1000_DEV_ID_I354_SGMII 0x1F41 +#define E1000_DEV_ID_I354_BACKPLANE_2_5GBPS 0x1F45 +#define E1000_DEV_ID_DH89XXCC_SGMII 0x0438 +#define E1000_DEV_ID_DH89XXCC_SERDES 0x043A +#define E1000_DEV_ID_DH89XXCC_BACKPLANE 0x043C +#define E1000_DEV_ID_DH89XXCC_SFP 0x0440 + +#define E1000_REVISION_0 0 +#define E1000_REVISION_1 1 +#define E1000_REVISION_2 2 +#define E1000_REVISION_3 3 +#define E1000_REVISION_4 4 + +#define E1000_FUNC_0 0 +#define E1000_FUNC_1 1 +#define E1000_FUNC_2 2 +#define E1000_FUNC_3 3 + +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN0 0 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN1 3 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN2 6 +#define E1000_ALT_MAC_ADDRESS_OFFSET_LAN3 9 + +enum e1000_mac_type { + e1000_undefined = 0, + e1000_82575, + e1000_82576, + e1000_82580, + e1000_i350, + e1000_i354, + e1000_i210, + e1000_i211, + e1000_num_macs /* List is 1-based, so subtract 1 for true count. */ +}; + +enum e1000_media_type { + e1000_media_type_unknown = 0, + e1000_media_type_copper = 1, + e1000_media_type_fiber = 2, + e1000_media_type_internal_serdes = 3, + e1000_num_media_types +}; + +enum e1000_nvm_type { + e1000_nvm_unknown = 0, + e1000_nvm_none, + e1000_nvm_eeprom_spi, + e1000_nvm_flash_hw, + e1000_nvm_invm, + e1000_nvm_flash_sw +}; + +enum e1000_nvm_override { + e1000_nvm_override_none = 0, + e1000_nvm_override_spi_small, + e1000_nvm_override_spi_large, +}; + +enum e1000_phy_type { + e1000_phy_unknown = 0, + e1000_phy_none, + e1000_phy_m88, + e1000_phy_igp, + e1000_phy_igp_2, + e1000_phy_gg82563, + e1000_phy_igp_3, + e1000_phy_ife, + e1000_phy_82580, + e1000_phy_vf, + e1000_phy_i210, +}; + +enum e1000_bus_type { + e1000_bus_type_unknown = 0, + e1000_bus_type_pci, + e1000_bus_type_pcix, + e1000_bus_type_pci_express, + e1000_bus_type_reserved +}; + +enum e1000_bus_speed { + e1000_bus_speed_unknown = 0, + e1000_bus_speed_33, + e1000_bus_speed_66, + e1000_bus_speed_100, + e1000_bus_speed_120, + e1000_bus_speed_133, + e1000_bus_speed_2500, + e1000_bus_speed_5000, + e1000_bus_speed_reserved +}; + +enum e1000_bus_width { + e1000_bus_width_unknown = 0, + e1000_bus_width_pcie_x1, + e1000_bus_width_pcie_x2, + e1000_bus_width_pcie_x4 = 4, + e1000_bus_width_pcie_x8 = 8, + e1000_bus_width_32, + e1000_bus_width_64, + e1000_bus_width_reserved +}; + +enum e1000_1000t_rx_status { + e1000_1000t_rx_status_not_ok = 0, + e1000_1000t_rx_status_ok, + e1000_1000t_rx_status_undefined = 0xFF +}; + +enum e1000_rev_polarity { + e1000_rev_polarity_normal = 0, + e1000_rev_polarity_reversed, + e1000_rev_polarity_undefined = 0xFF +}; + +enum e1000_fc_mode { + e1000_fc_none = 0, + e1000_fc_rx_pause, + e1000_fc_tx_pause, + e1000_fc_full, + e1000_fc_default = 0xFF +}; + +enum e1000_ms_type { + e1000_ms_hw_default = 0, + e1000_ms_force_master, + e1000_ms_force_slave, + e1000_ms_auto +}; + +enum e1000_smart_speed { + e1000_smart_speed_default = 0, + e1000_smart_speed_on, + e1000_smart_speed_off +}; + +enum e1000_serdes_link_state { + e1000_serdes_link_down = 0, + e1000_serdes_link_autoneg_progress, + e1000_serdes_link_autoneg_complete, + e1000_serdes_link_forced_up +}; + +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 +#endif +/* Receive Descriptor */ +struct e1000_rx_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + __le16 length; /* Length of data DMAed into data buffer */ + __le16 csum; /* Packet checksum */ + u8 status; /* Descriptor status */ + u8 errors; /* Descriptor Errors */ + __le16 special; +}; + +/* Receive Descriptor - Extended */ +union e1000_rx_desc_extended { + struct { + __le64 buffer_addr; + __le64 reserved; + } read; + struct { + struct { + __le32 mrq; /* Multiple Rx Queues */ + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +#define MAX_PS_BUFFERS 4 + +/* Number of packet split data buffers (not including the header buffer) */ +#define PS_PAGE_BUFFERS (MAX_PS_BUFFERS - 1) + +/* Receive Descriptor - Packet Split */ +union e1000_rx_desc_packet_split { + struct { + /* one buffer for protocol header(s), three data buffers */ + __le64 buffer_addr[MAX_PS_BUFFERS]; + } read; + struct { + struct { + __le32 mrq; /* Multiple Rx Queues */ + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length0; /* length of buffer 0 */ + __le16 vlan; /* VLAN tag */ + } middle; + struct { + __le16 header_status; + /* length of buffers 1-3 */ + __le16 length[PS_PAGE_BUFFERS]; + } upper; + __le64 reserved; + } wb; /* writeback */ +}; + +/* Transmit Descriptor */ +struct e1000_tx_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + __le32 data; + struct { + __le16 length; /* Data buffer length */ + u8 cso; /* Checksum offset */ + u8 cmd; /* Descriptor control */ + } flags; + } lower; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 css; /* Checksum start */ + __le16 special; + } fields; + } upper; +}; + +/* Offload Context Descriptor */ +struct e1000_context_desc { + union { + __le32 ip_config; + struct { + u8 ipcss; /* IP checksum start */ + u8 ipcso; /* IP checksum offset */ + __le16 ipcse; /* IP checksum end */ + } ip_fields; + } lower_setup; + union { + __le32 tcp_config; + struct { + u8 tucss; /* TCP checksum start */ + u8 tucso; /* TCP checksum offset */ + __le16 tucse; /* TCP checksum end */ + } tcp_fields; + } upper_setup; + __le32 cmd_and_length; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 hdr_len; /* Header length */ + __le16 mss; /* Maximum segment size */ + } fields; + } tcp_seg_setup; +}; + +/* Offload data descriptor */ +struct e1000_data_desc { + __le64 buffer_addr; /* Address of the descriptor's buffer address */ + union { + __le32 data; + struct { + __le16 length; /* Data buffer length */ + u8 typ_len_ext; + u8 cmd; + } flags; + } lower; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 popts; /* Packet Options */ + __le16 special; + } fields; + } upper; +}; + +/* Statistics counters collected by the MAC */ +struct e1000_hw_stats { + u64 crcerrs; + u64 algnerrc; + u64 symerrs; + u64 rxerrc; + u64 mpc; + u64 scc; + u64 ecol; + u64 mcc; + u64 latecol; + u64 colc; + u64 dc; + u64 tncrs; + u64 sec; + u64 cexterr; + u64 rlec; + u64 xonrxc; + u64 xontxc; + u64 xoffrxc; + u64 xofftxc; + u64 fcruc; + u64 prc64; + u64 prc127; + u64 prc255; + u64 prc511; + u64 prc1023; + u64 prc1522; + u64 gprc; + u64 bprc; + u64 mprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 rnbc; + u64 ruc; + u64 rfc; + u64 roc; + u64 rjc; + u64 mgprc; + u64 mgpdc; + u64 mgptc; + u64 tor; + u64 tot; + u64 tpr; + u64 tpt; + u64 ptc64; + u64 ptc127; + u64 ptc255; + u64 ptc511; + u64 ptc1023; + u64 ptc1522; + u64 mptc; + u64 bptc; + u64 tsctc; + u64 tsctfc; + u64 iac; + u64 icrxptc; + u64 icrxatc; + u64 ictxptc; + u64 ictxatc; + u64 ictxqec; + u64 ictxqmtc; + u64 icrxdmtc; + u64 icrxoc; + u64 cbtmpc; + u64 htdpmc; + u64 cbrdpc; + u64 cbrmpc; + u64 rpthc; + u64 hgptc; + u64 htcbdpc; + u64 hgorc; + u64 hgotc; + u64 lenerrs; + u64 scvpc; + u64 hrmpc; + u64 doosync; + u64 o2bgptc; + u64 o2bspc; + u64 b2ospc; + u64 b2ogprc; +}; + + +struct e1000_phy_stats { + u32 idle_errors; + u32 receive_errors; +}; + +struct e1000_host_mng_dhcp_cookie { + u32 signature; + u8 status; + u8 reserved0; + u16 vlan_id; + u32 reserved1; + u16 reserved2; + u8 reserved3; + u8 checksum; +}; + +/* Host Interface "Rev 1" */ +struct e1000_host_command_header { + u8 command_id; + u8 command_length; + u8 command_options; + u8 checksum; +}; + +#define E1000_HI_MAX_DATA_LENGTH 252 +struct e1000_host_command_info { + struct e1000_host_command_header command_header; + u8 command_data[E1000_HI_MAX_DATA_LENGTH]; +}; + +/* Host Interface "Rev 2" */ +struct e1000_host_mng_command_header { + u8 command_id; + u8 checksum; + u16 reserved1; + u16 reserved2; + u16 command_length; +}; + +#define E1000_HI_MAX_MNG_DATA_LENGTH 0x6F8 +struct e1000_host_mng_command_info { + struct e1000_host_mng_command_header command_header; + u8 command_data[E1000_HI_MAX_MNG_DATA_LENGTH]; +}; + +#include "e1000_mac.h" +#include "e1000_phy.h" +#include "e1000_nvm.h" +#include "e1000_manage.h" +#include "e1000_mbx.h" + +/* Function pointers for the MAC. */ +struct e1000_mac_operations { + s32 (*init_params)(struct e1000_hw *); + s32 (*id_led_init)(struct e1000_hw *); + s32 (*blink_led)(struct e1000_hw *); + bool (*check_mng_mode)(struct e1000_hw *); + s32 (*check_for_link)(struct e1000_hw *); + s32 (*cleanup_led)(struct e1000_hw *); + void (*clear_hw_cntrs)(struct e1000_hw *); + void (*clear_vfta)(struct e1000_hw *); + s32 (*get_bus_info)(struct e1000_hw *); + void (*set_lan_id)(struct e1000_hw *); + s32 (*get_link_up_info)(struct e1000_hw *, u16 *, u16 *); + s32 (*led_on)(struct e1000_hw *); + s32 (*led_off)(struct e1000_hw *); + void (*update_mc_addr_list)(struct e1000_hw *, u8 *, u32); + s32 (*reset_hw)(struct e1000_hw *); + s32 (*init_hw)(struct e1000_hw *); + void (*shutdown_serdes)(struct e1000_hw *); + void (*power_up_serdes)(struct e1000_hw *); + s32 (*setup_link)(struct e1000_hw *); + s32 (*setup_physical_interface)(struct e1000_hw *); + s32 (*setup_led)(struct e1000_hw *); + void (*write_vfta)(struct e1000_hw *, u32, u32); + void (*config_collision_dist)(struct e1000_hw *); + void (*rar_set)(struct e1000_hw *, u8*, u32); + s32 (*read_mac_addr)(struct e1000_hw *); + s32 (*validate_mdi_setting)(struct e1000_hw *); + s32 (*get_thermal_sensor_data)(struct e1000_hw *); + s32 (*init_thermal_sensor_thresh)(struct e1000_hw *); + s32 (*acquire_swfw_sync)(struct e1000_hw *, u16); + void (*release_swfw_sync)(struct e1000_hw *, u16); +}; + +/* When to use various PHY register access functions: + * + * Func Caller + * Function Does Does When to use + * ~~~~~~~~~~~~ ~~~~~ ~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * X_reg L,P,A n/a for simple PHY reg accesses + * X_reg_locked P,A L for multiple accesses of different regs + * on different pages + * X_reg_page A L,P for multiple accesses of different regs + * on the same page + * + * Where X=[read|write], L=locking, P=sets page, A=register access + * + */ +struct e1000_phy_operations { + s32 (*init_params)(struct e1000_hw *); + s32 (*acquire)(struct e1000_hw *); + s32 (*check_polarity)(struct e1000_hw *); + s32 (*check_reset_block)(struct e1000_hw *); + s32 (*commit)(struct e1000_hw *); + s32 (*force_speed_duplex)(struct e1000_hw *); + s32 (*get_cfg_done)(struct e1000_hw *hw); + s32 (*get_cable_length)(struct e1000_hw *); + s32 (*get_info)(struct e1000_hw *); + s32 (*set_page)(struct e1000_hw *, u16); + s32 (*read_reg)(struct e1000_hw *, u32, u16 *); + s32 (*read_reg_locked)(struct e1000_hw *, u32, u16 *); + s32 (*read_reg_page)(struct e1000_hw *, u32, u16 *); + void (*release)(struct e1000_hw *); + s32 (*reset)(struct e1000_hw *); + s32 (*set_d0_lplu_state)(struct e1000_hw *, bool); + s32 (*set_d3_lplu_state)(struct e1000_hw *, bool); + s32 (*write_reg)(struct e1000_hw *, u32, u16); + s32 (*write_reg_locked)(struct e1000_hw *, u32, u16); + s32 (*write_reg_page)(struct e1000_hw *, u32, u16); + void (*power_up)(struct e1000_hw *); + void (*power_down)(struct e1000_hw *); + s32 (*read_i2c_byte)(struct e1000_hw *, u8, u8, u8 *); + s32 (*write_i2c_byte)(struct e1000_hw *, u8, u8, u8); +}; + +/* Function pointers for the NVM. */ +struct e1000_nvm_operations { + s32 (*init_params)(struct e1000_hw *); + s32 (*acquire)(struct e1000_hw *); + s32 (*read)(struct e1000_hw *, u16, u16, u16 *); + void (*release)(struct e1000_hw *); + void (*reload)(struct e1000_hw *); + s32 (*update)(struct e1000_hw *); + s32 (*valid_led_default)(struct e1000_hw *, u16 *); + s32 (*validate)(struct e1000_hw *); + s32 (*write)(struct e1000_hw *, u16, u16, u16 *); +}; + +#define E1000_MAX_SENSORS 3 + +struct e1000_thermal_diode_data { + u8 location; + u8 temp; + u8 caution_thresh; + u8 max_op_thresh; +}; + +struct e1000_thermal_sensor_data { + struct e1000_thermal_diode_data sensor[E1000_MAX_SENSORS]; +}; + +struct e1000_mac_info { + struct e1000_mac_operations ops; + u8 addr[ETH_ADDR_LEN]; + u8 perm_addr[ETH_ADDR_LEN]; + + enum e1000_mac_type type; + + u32 collision_delta; + u32 ledctl_default; + u32 ledctl_mode1; + u32 ledctl_mode2; + u32 mc_filter_type; + u32 tx_packet_delta; + u32 txcw; + + u16 current_ifs_val; + u16 ifs_max_val; + u16 ifs_min_val; + u16 ifs_ratio; + u16 ifs_step_size; + u16 mta_reg_count; + u16 uta_reg_count; + + /* Maximum size of the MTA register table in all supported adapters */ + #define MAX_MTA_REG 128 + u32 mta_shadow[MAX_MTA_REG]; + u16 rar_entry_count; + + u8 forced_speed_duplex; + + bool adaptive_ifs; + bool has_fwsm; + bool arc_subsystem_valid; + bool asf_firmware_present; + bool autoneg; + bool autoneg_failed; + bool get_link_status; + bool in_ifs_mode; + enum e1000_serdes_link_state serdes_link_state; + bool serdes_has_link; + bool tx_pkt_filtering; + struct e1000_thermal_sensor_data thermal_sensor_data; +}; + +struct e1000_phy_info { + struct e1000_phy_operations ops; + enum e1000_phy_type type; + + enum e1000_1000t_rx_status local_rx; + enum e1000_1000t_rx_status remote_rx; + enum e1000_ms_type ms_type; + enum e1000_ms_type original_ms_type; + enum e1000_rev_polarity cable_polarity; + enum e1000_smart_speed smart_speed; + + u32 addr; + u32 id; + u32 reset_delay_us; /* in usec */ + u32 revision; + + enum e1000_media_type media_type; + + u16 autoneg_advertised; + u16 autoneg_mask; + u16 cable_length; + u16 max_cable_length; + u16 min_cable_length; + + u8 mdix; + + bool disable_polarity_correction; + bool is_mdix; + bool polarity_correction; + bool reset_disable; + bool speed_downgraded; + bool autoneg_wait_to_complete; +}; + +struct e1000_nvm_info { + struct e1000_nvm_operations ops; + enum e1000_nvm_type type; + enum e1000_nvm_override override; + + u32 flash_bank_size; + u32 flash_base_addr; + + u16 word_size; + u16 delay_usec; + u16 address_bits; + u16 opcode_bits; + u16 page_size; +}; + +struct e1000_bus_info { + enum e1000_bus_type type; + enum e1000_bus_speed speed; + enum e1000_bus_width width; + + u16 func; + u16 pci_cmd_word; +}; + +struct e1000_fc_info { + u32 high_water; /* Flow control high-water mark */ + u32 low_water; /* Flow control low-water mark */ + u16 pause_time; /* Flow control pause timer */ + u16 refresh_time; /* Flow control refresh timer */ + bool send_xon; /* Flow control send XON */ + bool strict_ieee; /* Strict IEEE mode */ + enum e1000_fc_mode current_mode; /* FC mode in effect */ + enum e1000_fc_mode requested_mode; /* FC mode requested by caller */ +}; + +struct e1000_mbx_operations { + s32 (*init_params)(struct e1000_hw *hw); + s32 (*read)(struct e1000_hw *, u32 *, u16, u16); + s32 (*write)(struct e1000_hw *, u32 *, u16, u16); + s32 (*read_posted)(struct e1000_hw *, u32 *, u16, u16); + s32 (*write_posted)(struct e1000_hw *, u32 *, u16, u16); + s32 (*check_for_msg)(struct e1000_hw *, u16); + s32 (*check_for_ack)(struct e1000_hw *, u16); + s32 (*check_for_rst)(struct e1000_hw *, u16); +}; + +struct e1000_mbx_stats { + u32 msgs_tx; + u32 msgs_rx; + + u32 acks; + u32 reqs; + u32 rsts; +}; + +struct e1000_mbx_info { + struct e1000_mbx_operations ops; + struct e1000_mbx_stats stats; + u32 timeout; + u32 usec_delay; + u16 size; +}; + +struct e1000_dev_spec_82575 { + bool sgmii_active; + bool global_device_reset; + bool eee_disable; + bool module_plugged; + bool clear_semaphore_once; + u32 mtu; + struct sfp_e1000_flags eth_flags; + u8 media_port; + bool media_changed; +}; + +struct e1000_dev_spec_vf { + u32 vf_number; + u32 v2p_mailbox; +}; + +struct e1000_hw { + void *back; + + u8 __iomem *hw_addr; + u8 __iomem *flash_address; + unsigned long io_base; + + struct e1000_mac_info mac; + struct e1000_fc_info fc; + struct e1000_phy_info phy; + struct e1000_nvm_info nvm; + struct e1000_bus_info bus; + struct e1000_mbx_info mbx; + struct e1000_host_mng_dhcp_cookie mng_cookie; + + union { + struct e1000_dev_spec_82575 _82575; + struct e1000_dev_spec_vf vf; + } dev_spec; + + u16 device_id; + u16 subsystem_vendor_id; + u16 subsystem_device_id; + u16 vendor_id; + + u8 revision_id; +}; + +#include "e1000_82575.h" +#include "e1000_i210.h" + +/* These functions must be implemented by drivers */ +s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); +s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value); + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c new file mode 100644 index 00000000..1e9f3e6e --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.c @@ -0,0 +1,909 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + + +static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw); +static void e1000_release_nvm_i210(struct e1000_hw *hw); +static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw); +static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); +static s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw); +static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data); + +/** + * e1000_acquire_nvm_i210 - Request for access to EEPROM + * @hw: pointer to the HW structure + * + * Acquire the necessary semaphores for exclusive access to the EEPROM. + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -E1000_ERR_NVM (-1). + **/ +static s32 e1000_acquire_nvm_i210(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_acquire_nvm_i210"); + + ret_val = e1000_acquire_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); + + return ret_val; +} + +/** + * e1000_release_nvm_i210 - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit, + * then release the semaphores acquired. + **/ +static void e1000_release_nvm_i210(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_release_nvm_i210"); + + e1000_release_swfw_sync_i210(hw, E1000_SWFW_EEP_SM); +} + +/** + * e1000_acquire_swfw_sync_i210 - Acquire SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Acquire the SW/FW semaphore to access the PHY or NVM. The mask + * will also specify which port we're acquiring the lock for. + **/ +s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 16; + s32 ret_val = E1000_SUCCESS; + s32 i = 0, timeout = 200; /* FIXME: find real value to use here */ + + DEBUGFUNC("e1000_acquire_swfw_sync_i210"); + + while (i < timeout) { + if (e1000_get_hw_semaphore_i210(hw)) { + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + if (!(swfw_sync & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) + * or other software thread using resource (swmask) + */ + e1000_put_hw_semaphore_generic(hw); + msec_delay_irq(5); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access resource, SW_FW_SYNC timeout.\n"); + ret_val = -E1000_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); + +out: + return ret_val; +} + +/** + * e1000_release_swfw_sync_i210 - Release SW/FW semaphore + * @hw: pointer to the HW structure + * @mask: specifies which semaphore to acquire + * + * Release the SW/FW semaphore used to access the PHY or NVM. The mask + * will also specify which port we're releasing the lock for. + **/ +void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask) +{ + u32 swfw_sync; + + DEBUGFUNC("e1000_release_swfw_sync_i210"); + + while (e1000_get_hw_semaphore_i210(hw) != E1000_SUCCESS) + ; /* Empty */ + + swfw_sync = E1000_READ_REG(hw, E1000_SW_FW_SYNC); + swfw_sync &= ~mask; + E1000_WRITE_REG(hw, E1000_SW_FW_SYNC, swfw_sync); + + e1000_put_hw_semaphore_generic(hw); +} + +/** + * e1000_get_hw_semaphore_i210 - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +static s32 e1000_get_hw_semaphore_i210(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_i210"); + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { + /* In rare circumstances, the SW semaphore may already be held + * unintentionally. Clear the semaphore once before giving up. + */ + if (hw->dev_spec._82575.clear_semaphore_once) { + hw->dev_spec._82575.clear_semaphore_once = false; + e1000_put_hw_semaphore_generic(hw); + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + } + } + + /* If we do not have the semaphore here, we have to give up. */ + if (i == timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_generic(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_nvm_srrd_i210 - Reads Shadow Ram using EERD register + * @hw: pointer to the HW structure + * @offset: offset of word in the Shadow Ram to read + * @words: number of words to read + * @data: word read from the Shadow Ram + * + * Reads a 16 bit word from the Shadow Ram using the EERD register. + * Uses necessary synchronization semaphores. + **/ +s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + s32 status = E1000_SUCCESS; + u16 i, count; + + DEBUGFUNC("e1000_read_nvm_srrd_i210"); + + /* We cannot hold synchronization semaphores for too long, + * because of forceful takeover procedure. However it is more efficient + * to read in bursts than synchronizing access for each word. */ + for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { + count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? + E1000_EERD_EEWR_MAX_COUNT : (words - i); + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + status = e1000_read_nvm_eerd(hw, offset, count, + data + i); + hw->nvm.ops.release(hw); + } else { + status = E1000_ERR_SWFW_SYNC; + } + + if (status != E1000_SUCCESS) + break; + } + + return status; +} + +/** + * e1000_write_nvm_srwr_i210 - Write to Shadow RAM using EEWR + * @hw: pointer to the HW structure + * @offset: offset within the Shadow RAM to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the Shadow RAM + * + * Writes data to Shadow RAM at offset using EEWR register. + * + * If e1000_update_nvm_checksum is not called after this function , the + * data will not be committed to FLASH and also Shadow RAM will most likely + * contain an invalid checksum. + * + * If error code is returned, data and Shadow RAM may be inconsistent - buffer + * partially written. + **/ +s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + s32 status = E1000_SUCCESS; + u16 i, count; + + DEBUGFUNC("e1000_write_nvm_srwr_i210"); + + /* We cannot hold synchronization semaphores for too long, + * because of forceful takeover procedure. However it is more efficient + * to write in bursts than synchronizing access for each word. */ + for (i = 0; i < words; i += E1000_EERD_EEWR_MAX_COUNT) { + count = (words - i) / E1000_EERD_EEWR_MAX_COUNT > 0 ? + E1000_EERD_EEWR_MAX_COUNT : (words - i); + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + status = e1000_write_nvm_srwr(hw, offset, count, + data + i); + hw->nvm.ops.release(hw); + } else { + status = E1000_ERR_SWFW_SYNC; + } + + if (status != E1000_SUCCESS) + break; + } + + return status; +} + +/** + * e1000_write_nvm_srwr - Write to Shadow Ram using EEWR + * @hw: pointer to the HW structure + * @offset: offset within the Shadow Ram to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the Shadow Ram + * + * Writes data to Shadow Ram at offset using EEWR register. + * + * If e1000_update_nvm_checksum is not called after this function , the + * Shadow Ram will most likely contain an invalid checksum. + **/ +static s32 e1000_write_nvm_srwr(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 i, k, eewr = 0; + u32 attempts = 100000; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_write_nvm_srwr"); + + /* + * A check for invalid values: offset too large, too many words, + * too many words for the offset, and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + ret_val = -E1000_ERR_NVM; + goto out; + } + + for (i = 0; i < words; i++) { + eewr = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) | + (data[i] << E1000_NVM_RW_REG_DATA) | + E1000_NVM_RW_REG_START; + + E1000_WRITE_REG(hw, E1000_SRWR, eewr); + + for (k = 0; k < attempts; k++) { + if (E1000_NVM_RW_REG_DONE & + E1000_READ_REG(hw, E1000_SRWR)) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(5); + } + + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("Shadow RAM write EEWR timed out\n"); + break; + } + } + +out: + return ret_val; +} + +/** e1000_read_invm_word_i210 - Reads OTP + * @hw: pointer to the HW structure + * @address: the word address (aka eeprom offset) to read + * @data: pointer to the data read + * + * Reads 16-bit words from the OTP. Return error when the word is not + * stored in OTP. + **/ +static s32 e1000_read_invm_word_i210(struct e1000_hw *hw, u8 address, u16 *data) +{ + s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; + u32 invm_dword; + u16 i; + u8 record_type, word_address; + + DEBUGFUNC("e1000_read_invm_word_i210"); + + for (i = 0; i < E1000_INVM_SIZE; i++) { + invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); + /* Get record type */ + record_type = INVM_DWORD_TO_RECORD_TYPE(invm_dword); + if (record_type == E1000_INVM_UNINITIALIZED_STRUCTURE) + break; + if (record_type == E1000_INVM_CSR_AUTOLOAD_STRUCTURE) + i += E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS; + if (record_type == E1000_INVM_RSA_KEY_SHA256_STRUCTURE) + i += E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS; + if (record_type == E1000_INVM_WORD_AUTOLOAD_STRUCTURE) { + word_address = INVM_DWORD_TO_WORD_ADDRESS(invm_dword); + if (word_address == address) { + *data = INVM_DWORD_TO_WORD_DATA(invm_dword); + DEBUGOUT2("Read INVM Word 0x%02x = %x", + address, *data); + status = E1000_SUCCESS; + break; + } + } + } + if (status != E1000_SUCCESS) + DEBUGOUT1("Requested word 0x%02x not found in OTP\n", address); + return status; +} + +/** e1000_read_invm_i210 - Read invm wrapper function for I210/I211 + * @hw: pointer to the HW structure + * @address: the word address (aka eeprom offset) to read + * @data: pointer to the data read + * + * Wrapper function to return data formerly found in the NVM. + **/ +static s32 e1000_read_invm_i210(struct e1000_hw *hw, u16 offset, + u16 E1000_UNUSEDARG words, u16 *data) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_read_invm_i210"); + + /* Only the MAC addr is required to be present in the iNVM */ + switch (offset) { + case NVM_MAC_ADDR: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, &data[0]); + ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+1, + &data[1]); + ret_val |= e1000_read_invm_word_i210(hw, (u8)offset+2, + &data[2]); + if (ret_val != E1000_SUCCESS) + DEBUGOUT("MAC Addr not found in iNVM\n"); + break; + case NVM_INIT_CTRL_2: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_INIT_CTRL_2_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_INIT_CTRL_4: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_INIT_CTRL_4_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_LED_1_CFG: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_LED_1_CFG_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_LED_0_2_CFG: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = NVM_LED_0_2_CFG_DEFAULT_I211; + ret_val = E1000_SUCCESS; + } + break; + case NVM_ID_LED_SETTINGS: + ret_val = e1000_read_invm_word_i210(hw, (u8)offset, data); + if (ret_val != E1000_SUCCESS) { + *data = ID_LED_RESERVED_FFFF; + ret_val = E1000_SUCCESS; + } + break; + case NVM_SUB_DEV_ID: + *data = hw->subsystem_device_id; + break; + case NVM_SUB_VEN_ID: + *data = hw->subsystem_vendor_id; + break; + case NVM_DEV_ID: + *data = hw->device_id; + break; + case NVM_VEN_ID: + *data = hw->vendor_id; + break; + default: + DEBUGOUT1("NVM word 0x%02x is not mapped.\n", offset); + *data = NVM_RESERVED_WORD; + break; + } + return ret_val; +} + +/** + * e1000_read_invm_version - Reads iNVM version and image type + * @hw: pointer to the HW structure + * @invm_ver: version structure for the version read + * + * Reads iNVM version and image type. + **/ +s32 e1000_read_invm_version(struct e1000_hw *hw, + struct e1000_fw_version *invm_ver) +{ + u32 *record = NULL; + u32 *next_record = NULL; + u32 i = 0; + u32 invm_dword = 0; + u32 invm_blocks = E1000_INVM_SIZE - (E1000_INVM_ULT_BYTES_SIZE / + E1000_INVM_RECORD_SIZE_IN_BYTES); + u32 buffer[E1000_INVM_SIZE]; + s32 status = -E1000_ERR_INVM_VALUE_NOT_FOUND; + u16 version = 0; + + DEBUGFUNC("e1000_read_invm_version"); + + /* Read iNVM memory */ + for (i = 0; i < E1000_INVM_SIZE; i++) { + invm_dword = E1000_READ_REG(hw, E1000_INVM_DATA_REG(i)); + buffer[i] = invm_dword; + } + + /* Read version number */ + for (i = 1; i < invm_blocks; i++) { + record = &buffer[invm_blocks - i]; + next_record = &buffer[invm_blocks - i + 1]; + + /* Check if we have first version location used */ + if ((i == 1) && ((*record & E1000_INVM_VER_FIELD_ONE) == 0)) { + version = 0; + status = E1000_SUCCESS; + break; + } + /* Check if we have second version location used */ + else if ((i == 1) && + ((*record & E1000_INVM_VER_FIELD_TWO) == 0)) { + version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + status = E1000_SUCCESS; + break; + } + /* + * Check if we have odd version location + * used and it is the last one used + */ + else if ((((*record & E1000_INVM_VER_FIELD_ONE) == 0) && + ((*record & 0x3) == 0)) || (((*record & 0x3) != 0) && + (i != 1))) { + version = (*next_record & E1000_INVM_VER_FIELD_TWO) + >> 13; + status = E1000_SUCCESS; + break; + } + /* + * Check if we have even version location + * used and it is the last one used + */ + else if (((*record & E1000_INVM_VER_FIELD_TWO) == 0) && + ((*record & 0x3) == 0)) { + version = (*record & E1000_INVM_VER_FIELD_ONE) >> 3; + status = E1000_SUCCESS; + break; + } + } + + if (status == E1000_SUCCESS) { + invm_ver->invm_major = (version & E1000_INVM_MAJOR_MASK) + >> E1000_INVM_MAJOR_SHIFT; + invm_ver->invm_minor = version & E1000_INVM_MINOR_MASK; + } + /* Read Image Type */ + for (i = 1; i < invm_blocks; i++) { + record = &buffer[invm_blocks - i]; + next_record = &buffer[invm_blocks - i + 1]; + + /* Check if we have image type in first location used */ + if ((i == 1) && ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) { + invm_ver->invm_img_type = 0; + status = E1000_SUCCESS; + break; + } + /* Check if we have image type in first location used */ + else if ((((*record & 0x3) == 0) && + ((*record & E1000_INVM_IMGTYPE_FIELD) == 0)) || + ((((*record & 0x3) != 0) && (i != 1)))) { + invm_ver->invm_img_type = + (*next_record & E1000_INVM_IMGTYPE_FIELD) >> 23; + status = E1000_SUCCESS; + break; + } + } + return status; +} + +/** + * e1000_validate_nvm_checksum_i210 - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + **/ +s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw) +{ + s32 status = E1000_SUCCESS; + s32 (*read_op_ptr)(struct e1000_hw *, u16, u16, u16 *); + + DEBUGFUNC("e1000_validate_nvm_checksum_i210"); + + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + + /* + * Replace the read function with semaphore grabbing with + * the one that skips this for a while. + * We have semaphore taken already here. + */ + read_op_ptr = hw->nvm.ops.read; + hw->nvm.ops.read = e1000_read_nvm_eerd; + + status = e1000_validate_nvm_checksum_generic(hw); + + /* Revert original read operation. */ + hw->nvm.ops.read = read_op_ptr; + + hw->nvm.ops.release(hw); + } else { + status = E1000_ERR_SWFW_SYNC; + } + + return status; +} + + +/** + * e1000_update_nvm_checksum_i210 - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. Next commit EEPROM data onto the Flash. + **/ +s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_update_nvm_checksum_i210"); + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + ret_val = e1000_read_nvm_eerd(hw, 0, 1, &nvm_data); + if (ret_val != E1000_SUCCESS) { + DEBUGOUT("EEPROM read failed\n"); + goto out; + } + + if (hw->nvm.ops.acquire(hw) == E1000_SUCCESS) { + /* + * Do not use hw->nvm.ops.write, hw->nvm.ops.read + * because we do not want to take the synchronization + * semaphores twice here. + */ + + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + ret_val = e1000_read_nvm_eerd(hw, i, 1, &nvm_data); + if (ret_val) { + hw->nvm.ops.release(hw); + DEBUGOUT("NVM Read Error while updating checksum.\n"); + goto out; + } + checksum += nvm_data; + } + checksum = (u16) NVM_SUM - checksum; + ret_val = e1000_write_nvm_srwr(hw, NVM_CHECKSUM_REG, 1, + &checksum); + if (ret_val != E1000_SUCCESS) { + hw->nvm.ops.release(hw); + DEBUGOUT("NVM Write Error while updating checksum.\n"); + goto out; + } + + hw->nvm.ops.release(hw); + + ret_val = e1000_update_flash_i210(hw); + } else { + ret_val = E1000_ERR_SWFW_SYNC; + } +out: + return ret_val; +} + +/** + * e1000_get_flash_presence_i210 - Check if flash device is detected. + * @hw: pointer to the HW structure + * + **/ +bool e1000_get_flash_presence_i210(struct e1000_hw *hw) +{ + u32 eec = 0; + bool ret_val = false; + + DEBUGFUNC("e1000_get_flash_presence_i210"); + + eec = E1000_READ_REG(hw, E1000_EECD); + + if (eec & E1000_EECD_FLASH_DETECTED_I210) + ret_val = true; + + return ret_val; +} + +/** + * e1000_update_flash_i210 - Commit EEPROM to the flash + * @hw: pointer to the HW structure + * + **/ +s32 e1000_update_flash_i210(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u32 flup; + + DEBUGFUNC("e1000_update_flash_i210"); + + ret_val = e1000_pool_flash_update_done_i210(hw); + if (ret_val == -E1000_ERR_NVM) { + DEBUGOUT("Flash update time out\n"); + goto out; + } + + flup = E1000_READ_REG(hw, E1000_EECD) | E1000_EECD_FLUPD_I210; + E1000_WRITE_REG(hw, E1000_EECD, flup); + + ret_val = e1000_pool_flash_update_done_i210(hw); + if (ret_val == E1000_SUCCESS) + DEBUGOUT("Flash update complete\n"); + else + DEBUGOUT("Flash update time out\n"); + +out: + return ret_val; +} + +/** + * e1000_pool_flash_update_done_i210 - Pool FLUDONE status. + * @hw: pointer to the HW structure + * + **/ +s32 e1000_pool_flash_update_done_i210(struct e1000_hw *hw) +{ + s32 ret_val = -E1000_ERR_NVM; + u32 i, reg; + + DEBUGFUNC("e1000_pool_flash_update_done_i210"); + + for (i = 0; i < E1000_FLUDONE_ATTEMPTS; i++) { + reg = E1000_READ_REG(hw, E1000_EECD); + if (reg & E1000_EECD_FLUDONE_I210) { + ret_val = E1000_SUCCESS; + break; + } + usec_delay(5); + } + + return ret_val; +} + +/** + * e1000_init_nvm_params_i210 - Initialize i210 NVM function pointers + * @hw: pointer to the HW structure + * + * Initialize the i210/i211 NVM parameters and function pointers. + **/ +static s32 e1000_init_nvm_params_i210(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + struct e1000_nvm_info *nvm = &hw->nvm; + + DEBUGFUNC("e1000_init_nvm_params_i210"); + + ret_val = e1000_init_nvm_params_82575(hw); + nvm->ops.acquire = e1000_acquire_nvm_i210; + nvm->ops.release = e1000_release_nvm_i210; + nvm->ops.valid_led_default = e1000_valid_led_default_i210; + if (e1000_get_flash_presence_i210(hw)) { + hw->nvm.type = e1000_nvm_flash_hw; + nvm->ops.read = e1000_read_nvm_srrd_i210; + nvm->ops.write = e1000_write_nvm_srwr_i210; + nvm->ops.validate = e1000_validate_nvm_checksum_i210; + nvm->ops.update = e1000_update_nvm_checksum_i210; + } else { + hw->nvm.type = e1000_nvm_invm; + nvm->ops.read = e1000_read_invm_i210; + nvm->ops.write = e1000_null_write_nvm; + nvm->ops.validate = e1000_null_ops_generic; + nvm->ops.update = e1000_null_ops_generic; + } + return ret_val; +} + +/** + * e1000_init_function_pointers_i210 - Init func ptrs. + * @hw: pointer to the HW structure + * + * Called to initialize all function pointers and parameters. + **/ +void e1000_init_function_pointers_i210(struct e1000_hw *hw) +{ + e1000_init_function_pointers_82575(hw); + hw->nvm.ops.init_params = e1000_init_nvm_params_i210; + + return; +} + +/** + * e1000_valid_led_default_i210 - Verify a valid default LED config + * @hw: pointer to the HW structure + * @data: pointer to the NVM (EEPROM) + * + * Read the EEPROM for the current default LED configuration. If the + * LED configuration is not valid, set to a valid LED configuration. + **/ +static s32 e1000_valid_led_default_i210(struct e1000_hw *hw, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_valid_led_default_i210"); + + ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + goto out; + } + + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) { + switch (hw->phy.media_type) { + case e1000_media_type_internal_serdes: + *data = ID_LED_DEFAULT_I210_SERDES; + break; + case e1000_media_type_copper: + default: + *data = ID_LED_DEFAULT_I210; + break; + } + } +out: + return ret_val; +} + +/** + * __e1000_access_xmdio_reg - Read/write XMDIO register + * @hw: pointer to the HW structure + * @address: XMDIO address to program + * @dev_addr: device address to program + * @data: pointer to value to read/write from/to the XMDIO address + * @read: boolean flag to indicate read or write + **/ +static s32 __e1000_access_xmdio_reg(struct e1000_hw *hw, u16 address, + u8 dev_addr, u16 *data, bool read) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("__e1000_access_xmdio_reg"); + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, dev_addr); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, address); + if (ret_val) + return ret_val; + + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, E1000_MMDAC_FUNC_DATA | + dev_addr); + if (ret_val) + return ret_val; + + if (read) + ret_val = hw->phy.ops.read_reg(hw, E1000_MMDAAD, data); + else + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAAD, *data); + if (ret_val) + return ret_val; + + /* Recalibrate the device back to 0 */ + ret_val = hw->phy.ops.write_reg(hw, E1000_MMDAC, 0); + if (ret_val) + return ret_val; + + return ret_val; +} + +/** + * e1000_read_xmdio_reg - Read XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be read from the EMI address + **/ +s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 *data) +{ + DEBUGFUNC("e1000_read_xmdio_reg"); + + return __e1000_access_xmdio_reg(hw, addr, dev_addr, data, true); +} + +/** + * e1000_write_xmdio_reg - Write XMDIO register + * @hw: pointer to the HW structure + * @addr: XMDIO address to program + * @dev_addr: device address to program + * @data: value to be written to the XMDIO address + **/ +s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, u16 data) +{ + DEBUGFUNC("e1000_read_xmdio_reg"); + + return __e1000_access_xmdio_reg(hw, addr, dev_addr, &data, false); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h new file mode 100644 index 00000000..57b2eb56 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_i210.h @@ -0,0 +1,91 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_I210_H_ +#define _E1000_I210_H_ + +bool e1000_get_flash_presence_i210(struct e1000_hw *hw); +s32 e1000_update_flash_i210(struct e1000_hw *hw); +s32 e1000_update_nvm_checksum_i210(struct e1000_hw *hw); +s32 e1000_validate_nvm_checksum_i210(struct e1000_hw *hw); +s32 e1000_write_nvm_srwr_i210(struct e1000_hw *hw, u16 offset, + u16 words, u16 *data); +s32 e1000_read_nvm_srrd_i210(struct e1000_hw *hw, u16 offset, + u16 words, u16 *data); +s32 e1000_read_invm_version(struct e1000_hw *hw, + struct e1000_fw_version *invm_ver); +s32 e1000_acquire_swfw_sync_i210(struct e1000_hw *hw, u16 mask); +void e1000_release_swfw_sync_i210(struct e1000_hw *hw, u16 mask); +s32 e1000_read_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, + u16 *data); +s32 e1000_write_xmdio_reg(struct e1000_hw *hw, u16 addr, u8 dev_addr, + u16 data); + +#define E1000_STM_OPCODE 0xDB00 +#define E1000_EEPROM_FLASH_SIZE_WORD 0x11 + +#define INVM_DWORD_TO_RECORD_TYPE(invm_dword) \ + (u8)((invm_dword) & 0x7) +#define INVM_DWORD_TO_WORD_ADDRESS(invm_dword) \ + (u8)(((invm_dword) & 0x0000FE00) >> 9) +#define INVM_DWORD_TO_WORD_DATA(invm_dword) \ + (u16)(((invm_dword) & 0xFFFF0000) >> 16) + +enum E1000_INVM_STRUCTURE_TYPE { + E1000_INVM_UNINITIALIZED_STRUCTURE = 0x00, + E1000_INVM_WORD_AUTOLOAD_STRUCTURE = 0x01, + E1000_INVM_CSR_AUTOLOAD_STRUCTURE = 0x02, + E1000_INVM_PHY_REGISTER_AUTOLOAD_STRUCTURE = 0x03, + E1000_INVM_RSA_KEY_SHA256_STRUCTURE = 0x04, + E1000_INVM_INVALIDATED_STRUCTURE = 0x0F, +}; + +#define E1000_INVM_RSA_KEY_SHA256_DATA_SIZE_IN_DWORDS 8 +#define E1000_INVM_CSR_AUTOLOAD_DATA_SIZE_IN_DWORDS 1 +#define E1000_INVM_ULT_BYTES_SIZE 8 +#define E1000_INVM_RECORD_SIZE_IN_BYTES 4 +#define E1000_INVM_VER_FIELD_ONE 0x1FF8 +#define E1000_INVM_VER_FIELD_TWO 0x7FE000 +#define E1000_INVM_IMGTYPE_FIELD 0x1F800000 + +#define E1000_INVM_MAJOR_MASK 0x3F0 +#define E1000_INVM_MINOR_MASK 0xF +#define E1000_INVM_MAJOR_SHIFT 4 + +#define ID_LED_DEFAULT_I210 ((ID_LED_OFF1_ON2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_OFF2)) +#define ID_LED_DEFAULT_I210_SERDES ((ID_LED_DEF1_DEF2 << 8) | \ + (ID_LED_DEF1_DEF2 << 4) | \ + (ID_LED_OFF1_ON2)) + +/* NVM offset defaults for I211 devices */ +#define NVM_INIT_CTRL_2_DEFAULT_I211 0X7243 +#define NVM_INIT_CTRL_4_DEFAULT_I211 0x00C1 +#define NVM_LED_1_CFG_DEFAULT_I211 0x0184 +#define NVM_LED_0_2_CFG_DEFAULT_I211 0x200C +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c new file mode 100644 index 00000000..4ee59ba9 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.c @@ -0,0 +1,2096 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw); +static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw); +static void e1000_config_collision_dist_generic(struct e1000_hw *hw); +static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index); + +/** + * e1000_init_mac_ops_generic - Initialize MAC function pointers + * @hw: pointer to the HW structure + * + * Setups up the function pointers to no-op functions + **/ +void e1000_init_mac_ops_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + DEBUGFUNC("e1000_init_mac_ops_generic"); + + /* General Setup */ + mac->ops.init_params = e1000_null_ops_generic; + mac->ops.init_hw = e1000_null_ops_generic; + mac->ops.reset_hw = e1000_null_ops_generic; + mac->ops.setup_physical_interface = e1000_null_ops_generic; + mac->ops.get_bus_info = e1000_null_ops_generic; + mac->ops.set_lan_id = e1000_set_lan_id_multi_port_pcie; + mac->ops.read_mac_addr = e1000_read_mac_addr_generic; + mac->ops.config_collision_dist = e1000_config_collision_dist_generic; + mac->ops.clear_hw_cntrs = e1000_null_mac_generic; + /* LED */ + mac->ops.cleanup_led = e1000_null_ops_generic; + mac->ops.setup_led = e1000_null_ops_generic; + mac->ops.blink_led = e1000_null_ops_generic; + mac->ops.led_on = e1000_null_ops_generic; + mac->ops.led_off = e1000_null_ops_generic; + /* LINK */ + mac->ops.setup_link = e1000_null_ops_generic; + mac->ops.get_link_up_info = e1000_null_link_info; + mac->ops.check_for_link = e1000_null_ops_generic; + /* Management */ + mac->ops.check_mng_mode = e1000_null_mng_mode; + /* VLAN, MC, etc. */ + mac->ops.update_mc_addr_list = e1000_null_update_mc; + mac->ops.clear_vfta = e1000_null_mac_generic; + mac->ops.write_vfta = e1000_null_write_vfta; + mac->ops.rar_set = e1000_rar_set_generic; + mac->ops.validate_mdi_setting = e1000_validate_mdi_setting_generic; +} + +/** + * e1000_null_ops_generic - No-op function, returns 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_ops_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_ops_generic"); + return E1000_SUCCESS; +} + +/** + * e1000_null_mac_generic - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_mac_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_mac_generic"); + return; +} + +/** + * e1000_null_link_info - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_link_info(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG *s, u16 E1000_UNUSEDARG *d) +{ + DEBUGFUNC("e1000_null_link_info"); + return E1000_SUCCESS; +} + +/** + * e1000_null_mng_mode - No-op function, return false + * @hw: pointer to the HW structure + **/ +bool e1000_null_mng_mode(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_mng_mode"); + return false; +} + +/** + * e1000_null_update_mc - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_update_mc(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) +{ + DEBUGFUNC("e1000_null_update_mc"); + return; +} + +/** + * e1000_null_write_vfta - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_write_vfta(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG a, u32 E1000_UNUSEDARG b) +{ + DEBUGFUNC("e1000_null_write_vfta"); + return; +} + +/** + * e1000_null_rar_set - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_rar_set(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG *h, u32 E1000_UNUSEDARG a) +{ + DEBUGFUNC("e1000_null_rar_set"); + return; +} + +/** + * e1000_get_bus_info_pcie_generic - Get PCIe bus information + * @hw: pointer to the HW structure + * + * Determines and stores the system bus information for a particular + * network interface. The following bus information is determined and stored: + * bus speed, bus width, type (PCIe), and PCIe function. + **/ +s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + struct e1000_bus_info *bus = &hw->bus; + s32 ret_val; + u16 pcie_link_status; + + DEBUGFUNC("e1000_get_bus_info_pcie_generic"); + + bus->type = e1000_bus_type_pci_express; + + ret_val = e1000_read_pcie_cap_reg(hw, PCIE_LINK_STATUS, + &pcie_link_status); + if (ret_val) { + bus->width = e1000_bus_width_unknown; + bus->speed = e1000_bus_speed_unknown; + } else { + switch (pcie_link_status & PCIE_LINK_SPEED_MASK) { + case PCIE_LINK_SPEED_2500: + bus->speed = e1000_bus_speed_2500; + break; + case PCIE_LINK_SPEED_5000: + bus->speed = e1000_bus_speed_5000; + break; + default: + bus->speed = e1000_bus_speed_unknown; + break; + } + + bus->width = (enum e1000_bus_width)((pcie_link_status & + PCIE_LINK_WIDTH_MASK) >> PCIE_LINK_WIDTH_SHIFT); + } + + mac->ops.set_lan_id(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices + * + * @hw: pointer to the HW structure + * + * Determines the LAN function id by reading memory-mapped registers + * and swaps the port value if requested. + **/ +static void e1000_set_lan_id_multi_port_pcie(struct e1000_hw *hw) +{ + struct e1000_bus_info *bus = &hw->bus; + u32 reg; + + /* The status register reports the correct function number + * for the device regardless of function swap state. + */ + reg = E1000_READ_REG(hw, E1000_STATUS); + bus->func = (reg & E1000_STATUS_FUNC_MASK) >> E1000_STATUS_FUNC_SHIFT; +} + +/** + * e1000_set_lan_id_single_port - Set LAN id for a single port device + * @hw: pointer to the HW structure + * + * Sets the LAN function id to zero for a single port device. + **/ +void e1000_set_lan_id_single_port(struct e1000_hw *hw) +{ + struct e1000_bus_info *bus = &hw->bus; + + bus->func = 0; +} + +/** + * e1000_clear_vfta_generic - Clear VLAN filter table + * @hw: pointer to the HW structure + * + * Clears the register array which contains the VLAN filter table by + * setting all the values to 0. + **/ +void e1000_clear_vfta_generic(struct e1000_hw *hw) +{ + u32 offset; + + DEBUGFUNC("e1000_clear_vfta_generic"); + + for (offset = 0; offset < E1000_VLAN_FILTER_TBL_SIZE; offset++) { + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, 0); + E1000_WRITE_FLUSH(hw); + } +} + +/** + * e1000_write_vfta_generic - Write value to VLAN filter table + * @hw: pointer to the HW structure + * @offset: register offset in VLAN filter table + * @value: register value written to VLAN filter table + * + * Writes value at the given offset in the register array which stores + * the VLAN filter table. + **/ +void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value) +{ + DEBUGFUNC("e1000_write_vfta_generic"); + + E1000_WRITE_REG_ARRAY(hw, E1000_VFTA, offset, value); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_init_rx_addrs_generic - Initialize receive address's + * @hw: pointer to the HW structure + * @rar_count: receive address registers + * + * Setup the receive address registers by setting the base receive address + * register to the devices MAC address and clearing all the other receive + * address registers to 0. + **/ +void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count) +{ + u32 i; + u8 mac_addr[ETH_ADDR_LEN] = {0}; + + DEBUGFUNC("e1000_init_rx_addrs_generic"); + + /* Setup the receive address */ + DEBUGOUT("Programming MAC Address into RAR[0]\n"); + + hw->mac.ops.rar_set(hw, hw->mac.addr, 0); + + /* Zero out the other (rar_entry_count - 1) receive addresses */ + DEBUGOUT1("Clearing RAR[1-%u]\n", rar_count-1); + for (i = 1; i < rar_count; i++) + hw->mac.ops.rar_set(hw, mac_addr, i); +} + +/** + * e1000_check_alt_mac_addr_generic - Check for alternate MAC addr + * @hw: pointer to the HW structure + * + * Checks the nvm for an alternate MAC address. An alternate MAC address + * can be setup by pre-boot software and must be treated like a permanent + * address and must override the actual permanent MAC address. If an + * alternate MAC address is found it is programmed into RAR0, replacing + * the permanent address that was installed into RAR0 by the Si on reset. + * This function will return SUCCESS unless it encounters an error while + * reading the EEPROM. + **/ +s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw) +{ + u32 i; + s32 ret_val; + u16 offset, nvm_alt_mac_addr_offset, nvm_data; + u8 alt_mac_addr[ETH_ADDR_LEN]; + + DEBUGFUNC("e1000_check_alt_mac_addr_generic"); + + ret_val = hw->nvm.ops.read(hw, NVM_COMPAT, 1, &nvm_data); + if (ret_val) + return ret_val; + + + /* Alternate MAC address is handled by the option ROM for 82580 + * and newer. SW support not required. + */ + if (hw->mac.type >= e1000_82580) + return E1000_SUCCESS; + + ret_val = hw->nvm.ops.read(hw, NVM_ALT_MAC_ADDR_PTR, 1, + &nvm_alt_mac_addr_offset); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if ((nvm_alt_mac_addr_offset == 0xFFFF) || + (nvm_alt_mac_addr_offset == 0x0000)) + /* There is no Alternate MAC Address */ + return E1000_SUCCESS; + + if (hw->bus.func == E1000_FUNC_1) + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN1; + if (hw->bus.func == E1000_FUNC_2) + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN2; + + if (hw->bus.func == E1000_FUNC_3) + nvm_alt_mac_addr_offset += E1000_ALT_MAC_ADDRESS_OFFSET_LAN3; + for (i = 0; i < ETH_ADDR_LEN; i += 2) { + offset = nvm_alt_mac_addr_offset + (i >> 1); + ret_val = hw->nvm.ops.read(hw, offset, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + alt_mac_addr[i] = (u8)(nvm_data & 0xFF); + alt_mac_addr[i + 1] = (u8)(nvm_data >> 8); + } + + /* if multicast bit is set, the alternate address will not be used */ + if (alt_mac_addr[0] & 0x01) { + DEBUGOUT("Ignoring Alternate Mac Address with MC bit set\n"); + return E1000_SUCCESS; + } + + /* We have a valid alternate MAC address, and we want to treat it the + * same as the normal permanent MAC address stored by the HW into the + * RAR. Do this by mapping this address into RAR0. + */ + hw->mac.ops.rar_set(hw, alt_mac_addr, 0); + + return E1000_SUCCESS; +} + +/** + * e1000_rar_set_generic - Set receive address register + * @hw: pointer to the HW structure + * @addr: pointer to the receive address + * @index: receive address array register + * + * Sets the receive address array register at index to the address passed + * in by addr. + **/ +static void e1000_rar_set_generic(struct e1000_hw *hw, u8 *addr, u32 index) +{ + u32 rar_low, rar_high; + + DEBUGFUNC("e1000_rar_set_generic"); + + /* HW expects these in little endian so we reverse the byte order + * from network order (big endian) to little endian + */ + rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | + ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); + + rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + + /* If MAC address zero, no need to set the AV bit */ + if (rar_low || rar_high) + rar_high |= E1000_RAH_AV; + + /* Some bridges will combine consecutive 32-bit writes into + * a single burst write, which will malfunction on some parts. + * The flushes avoid this. + */ + E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); + E1000_WRITE_FLUSH(hw); + E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_hash_mc_addr_generic - Generate a multicast hash value + * @hw: pointer to the HW structure + * @mc_addr: pointer to a multicast address + * + * Generates a multicast address hash value which is used to determine + * the multicast filter table array address and new table value. + **/ +u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr) +{ + u32 hash_value, hash_mask; + u8 bit_shift = 0; + + DEBUGFUNC("e1000_hash_mc_addr_generic"); + + /* Register count multiplied by bits per register */ + hash_mask = (hw->mac.mta_reg_count * 32) - 1; + + /* For a mc_filter_type of 0, bit_shift is the number of left-shifts + * where 0xFF would still fall within the hash mask. + */ + while (hash_mask >> bit_shift != 0xFF) + bit_shift++; + + /* The portion of the address that is used for the hash table + * is determined by the mc_filter_type setting. + * The algorithm is such that there is a total of 8 bits of shifting. + * The bit_shift for a mc_filter_type of 0 represents the number of + * left-shifts where the MSB of mc_addr[5] would still fall within + * the hash_mask. Case 0 does this exactly. Since there are a total + * of 8 bits of shifting, then mc_addr[4] will shift right the + * remaining number of bits. Thus 8 - bit_shift. The rest of the + * cases are a variation of this algorithm...essentially raising the + * number of bits to shift mc_addr[5] left, while still keeping the + * 8-bit shifting total. + * + * For example, given the following Destination MAC Address and an + * mta register count of 128 (thus a 4096-bit vector and 0xFFF mask), + * we can see that the bit_shift for case 0 is 4. These are the hash + * values resulting from each mc_filter_type... + * [0] [1] [2] [3] [4] [5] + * 01 AA 00 12 34 56 + * LSB MSB + * + * case 0: hash_value = ((0x34 >> 4) | (0x56 << 4)) & 0xFFF = 0x563 + * case 1: hash_value = ((0x34 >> 3) | (0x56 << 5)) & 0xFFF = 0xAC6 + * case 2: hash_value = ((0x34 >> 2) | (0x56 << 6)) & 0xFFF = 0x163 + * case 3: hash_value = ((0x34 >> 0) | (0x56 << 8)) & 0xFFF = 0x634 + */ + switch (hw->mac.mc_filter_type) { + default: + case 0: + break; + case 1: + bit_shift += 1; + break; + case 2: + bit_shift += 2; + break; + case 3: + bit_shift += 4; + break; + } + + hash_value = hash_mask & (((mc_addr[4] >> (8 - bit_shift)) | + (((u16) mc_addr[5]) << bit_shift))); + + return hash_value; +} + +/** + * e1000_update_mc_addr_list_generic - Update Multicast addresses + * @hw: pointer to the HW structure + * @mc_addr_list: array of multicast addresses to program + * @mc_addr_count: number of multicast addresses to program + * + * Updates entire Multicast Table Array. + * The caller must have a packed mc_addr_list of multicast addresses. + **/ +void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count) +{ + u32 hash_value, hash_bit, hash_reg; + int i; + + DEBUGFUNC("e1000_update_mc_addr_list_generic"); + + /* clear mta_shadow */ + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + + /* update mta_shadow from mc_addr_list */ + for (i = 0; (u32) i < mc_addr_count; i++) { + hash_value = e1000_hash_mc_addr_generic(hw, mc_addr_list); + + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + hw->mac.mta_shadow[hash_reg] |= (1 << hash_bit); + mc_addr_list += (ETH_ADDR_LEN); + } + + /* replace the entire MTA table */ + for (i = hw->mac.mta_reg_count - 1; i >= 0; i--) + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, i, hw->mac.mta_shadow[i]); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_clear_hw_cntrs_base_generic - Clear base hardware counters + * @hw: pointer to the HW structure + * + * Clears the base hardware counters by reading the counter registers. + **/ +void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_clear_hw_cntrs_base_generic"); + + E1000_READ_REG(hw, E1000_CRCERRS); + E1000_READ_REG(hw, E1000_SYMERRS); + E1000_READ_REG(hw, E1000_MPC); + E1000_READ_REG(hw, E1000_SCC); + E1000_READ_REG(hw, E1000_ECOL); + E1000_READ_REG(hw, E1000_MCC); + E1000_READ_REG(hw, E1000_LATECOL); + E1000_READ_REG(hw, E1000_COLC); + E1000_READ_REG(hw, E1000_DC); + E1000_READ_REG(hw, E1000_SEC); + E1000_READ_REG(hw, E1000_RLEC); + E1000_READ_REG(hw, E1000_XONRXC); + E1000_READ_REG(hw, E1000_XONTXC); + E1000_READ_REG(hw, E1000_XOFFRXC); + E1000_READ_REG(hw, E1000_XOFFTXC); + E1000_READ_REG(hw, E1000_FCRUC); + E1000_READ_REG(hw, E1000_GPRC); + E1000_READ_REG(hw, E1000_BPRC); + E1000_READ_REG(hw, E1000_MPRC); + E1000_READ_REG(hw, E1000_GPTC); + E1000_READ_REG(hw, E1000_GORCL); + E1000_READ_REG(hw, E1000_GORCH); + E1000_READ_REG(hw, E1000_GOTCL); + E1000_READ_REG(hw, E1000_GOTCH); + E1000_READ_REG(hw, E1000_RNBC); + E1000_READ_REG(hw, E1000_RUC); + E1000_READ_REG(hw, E1000_RFC); + E1000_READ_REG(hw, E1000_ROC); + E1000_READ_REG(hw, E1000_RJC); + E1000_READ_REG(hw, E1000_TORL); + E1000_READ_REG(hw, E1000_TORH); + E1000_READ_REG(hw, E1000_TOTL); + E1000_READ_REG(hw, E1000_TOTH); + E1000_READ_REG(hw, E1000_TPR); + E1000_READ_REG(hw, E1000_TPT); + E1000_READ_REG(hw, E1000_MPTC); + E1000_READ_REG(hw, E1000_BPTC); +} + +/** + * e1000_check_for_copper_link_generic - Check for link (Copper) + * @hw: pointer to the HW structure + * + * Checks to see of the link status of the hardware has changed. If a + * change in link status has been detected, then we read the PHY registers + * to get the current speed/duplex if link exists. + **/ +s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val; + bool link; + + DEBUGFUNC("e1000_check_for_copper_link"); + + /* We only want to go out to the PHY registers to see if Auto-Neg + * has completed and/or if our link status has changed. The + * get_link_status flag is set upon receiving a Link Status + * Change or Rx Sequence Error interrupt. + */ + if (!mac->get_link_status) + return E1000_SUCCESS; + + /* First we want to see if the MII Status Register reports + * link. If so, then we want to get the current speed/duplex + * of the PHY. + */ + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) + return E1000_SUCCESS; /* No link detected */ + + mac->get_link_status = false; + + /* Check if there was DownShift, must be checked + * immediately after link-up + */ + e1000_check_downshift_generic(hw); + + /* If we are forcing speed/duplex, then we simply return since + * we have already determined whether we have link or not. + */ + if (!mac->autoneg) + return -E1000_ERR_CONFIG; + + /* Auto-Neg is enabled. Auto Speed Detection takes care + * of MAC speed/duplex configuration. So we only need to + * configure Collision Distance in the MAC. + */ + mac->ops.config_collision_dist(hw); + + /* Configure Flow Control now that Auto-Neg has completed. + * First, we need to restore the desired flow control + * settings because we may have had to re-autoneg with a + * different link partner. + */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) + DEBUGOUT("Error configuring flow control\n"); + + return ret_val; +} + +/** + * e1000_check_for_fiber_link_generic - Check for link (Fiber) + * @hw: pointer to the HW structure + * + * Checks for link up on the hardware. If link is not up and we have + * a signal, then we need to force link up. + **/ +s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 rxcw; + u32 ctrl; + u32 status; + s32 ret_val; + + DEBUGFUNC("e1000_check_for_fiber_link_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + status = E1000_READ_REG(hw, E1000_STATUS); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + + /* If we don't have link (auto-negotiation failed or link partner + * cannot auto-negotiate), the cable is plugged in (we have signal), + * and our link partner is not trying to auto-negotiate with us (we + * are receiving idles or data), we need to force link up. We also + * need to give auto-negotiation time to complete, in case the cable + * was just plugged in. The autoneg_failed flag does this. + */ + /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ + if ((ctrl & E1000_CTRL_SWDPIN1) && !(status & E1000_STATUS_LU) && + !(rxcw & E1000_RXCW_C)) { + if (!mac->autoneg_failed) { + mac->autoneg_failed = true; + return E1000_SUCCESS; + } + DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); + + /* Disable auto-negotiation in the TXCW register */ + E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); + + /* Force link-up and also force full-duplex. */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Configure Flow Control after forcing link up. */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) { + DEBUGOUT("Error configuring flow control\n"); + return ret_val; + } + } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { + /* If we are forcing link and we are receiving /C/ ordered + * sets, re-enable auto-negotiation in the TXCW register + * and disable forced link in the Device Control register + * in an attempt to auto-negotiate with our link partner. + */ + DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); + E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); + E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); + + mac->serdes_has_link = true; + } + + return E1000_SUCCESS; +} + +/** + * e1000_check_for_serdes_link_generic - Check for link (Serdes) + * @hw: pointer to the HW structure + * + * Checks for link up on the hardware. If link is not up and we have + * a signal, then we need to force link up. + **/ +s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 rxcw; + u32 ctrl; + u32 status; + s32 ret_val; + + DEBUGFUNC("e1000_check_for_serdes_link_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + status = E1000_READ_REG(hw, E1000_STATUS); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + + /* If we don't have link (auto-negotiation failed or link partner + * cannot auto-negotiate), and our link partner is not trying to + * auto-negotiate with us (we are receiving idles or data), + * we need to force link up. We also need to give auto-negotiation + * time to complete. + */ + /* (ctrl & E1000_CTRL_SWDPIN1) == 1 == have signal */ + if (!(status & E1000_STATUS_LU) && !(rxcw & E1000_RXCW_C)) { + if (!mac->autoneg_failed) { + mac->autoneg_failed = true; + return E1000_SUCCESS; + } + DEBUGOUT("NOT Rx'ing /C/, disable AutoNeg and force link.\n"); + + /* Disable auto-negotiation in the TXCW register */ + E1000_WRITE_REG(hw, E1000_TXCW, (mac->txcw & ~E1000_TXCW_ANE)); + + /* Force link-up and also force full-duplex. */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= (E1000_CTRL_SLU | E1000_CTRL_FD); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Configure Flow Control after forcing link up. */ + ret_val = e1000_config_fc_after_link_up_generic(hw); + if (ret_val) { + DEBUGOUT("Error configuring flow control\n"); + return ret_val; + } + } else if ((ctrl & E1000_CTRL_SLU) && (rxcw & E1000_RXCW_C)) { + /* If we are forcing link and we are receiving /C/ ordered + * sets, re-enable auto-negotiation in the TXCW register + * and disable forced link in the Device Control register + * in an attempt to auto-negotiate with our link partner. + */ + DEBUGOUT("Rx'ing /C/, enable AutoNeg and stop forcing link.\n"); + E1000_WRITE_REG(hw, E1000_TXCW, mac->txcw); + E1000_WRITE_REG(hw, E1000_CTRL, (ctrl & ~E1000_CTRL_SLU)); + + mac->serdes_has_link = true; + } else if (!(E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW))) { + /* If we force link for non-auto-negotiation switch, check + * link status based on MAC synchronization for internal + * serdes media type. + */ + /* SYNCH bit and IV bit are sticky. */ + usec_delay(10); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + if (rxcw & E1000_RXCW_SYNCH) { + if (!(rxcw & E1000_RXCW_IV)) { + mac->serdes_has_link = true; + DEBUGOUT("SERDES: Link up - forced.\n"); + } + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - force failed.\n"); + } + } + + if (E1000_TXCW_ANE & E1000_READ_REG(hw, E1000_TXCW)) { + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_LU) { + /* SYNCH bit and IV bit are sticky, so reread rxcw. */ + usec_delay(10); + rxcw = E1000_READ_REG(hw, E1000_RXCW); + if (rxcw & E1000_RXCW_SYNCH) { + if (!(rxcw & E1000_RXCW_IV)) { + mac->serdes_has_link = true; + DEBUGOUT("SERDES: Link up - autoneg completed successfully.\n"); + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - invalid codewords detected in autoneg.\n"); + } + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - no sync.\n"); + } + } else { + mac->serdes_has_link = false; + DEBUGOUT("SERDES: Link down - autoneg failed\n"); + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_set_default_fc_generic - Set flow control default values + * @hw: pointer to the HW structure + * + * Read the EEPROM for the default values for flow control and store the + * values. + **/ +static s32 e1000_set_default_fc_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 nvm_data; + + DEBUGFUNC("e1000_set_default_fc_generic"); + + /* Read and store word 0x0F of the EEPROM. This word contains bits + * that determine the hardware's default PAUSE (flow control) mode, + * a bit that determines whether the HW defaults to enabling or + * disabling auto-negotiation, and the direction of the + * SW defined pins. If there is no SW over-ride of the flow + * control setting, then the variable hw->fc will + * be initialized based on a value in the EEPROM. + */ + ret_val = hw->nvm.ops.read(hw, NVM_INIT_CONTROL2_REG, 1, &nvm_data); + + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (!(nvm_data & NVM_WORD0F_PAUSE_MASK)) + hw->fc.requested_mode = e1000_fc_none; + else if ((nvm_data & NVM_WORD0F_PAUSE_MASK) == + NVM_WORD0F_ASM_DIR) + hw->fc.requested_mode = e1000_fc_tx_pause; + else + hw->fc.requested_mode = e1000_fc_full; + + return E1000_SUCCESS; +} + +/** + * e1000_setup_link_generic - Setup flow control and link settings + * @hw: pointer to the HW structure + * + * Determines which flow control settings to use, then configures flow + * control. Calls the appropriate media-specific link configuration + * function. Assuming the adapter has a valid link partner, a valid link + * should be established. Assumes the hardware has previously been reset + * and the transmitter and receiver are not enabled. + **/ +s32 e1000_setup_link_generic(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_setup_link_generic"); + + /* In the case of the phy reset being blocked, we already have a link. + * We do not need to set it up again. + */ + if (hw->phy.ops.check_reset_block && hw->phy.ops.check_reset_block(hw)) + return E1000_SUCCESS; + + /* If requested flow control is set to default, set flow control + * based on the EEPROM flow control settings. + */ + if (hw->fc.requested_mode == e1000_fc_default) { + ret_val = e1000_set_default_fc_generic(hw); + if (ret_val) + return ret_val; + } + + /* Save off the requested flow control mode for use later. Depending + * on the link partner's capabilities, we may or may not use this mode. + */ + hw->fc.current_mode = hw->fc.requested_mode; + + DEBUGOUT1("After fix-ups FlowControl is now = %x\n", + hw->fc.current_mode); + + /* Call the necessary media_type subroutine to configure the link. */ + ret_val = hw->mac.ops.setup_physical_interface(hw); + if (ret_val) + return ret_val; + + /* Initialize the flow control address, type, and PAUSE timer + * registers to their default values. This is done even if flow + * control is disabled, because it does not hurt anything to + * initialize these registers. + */ + DEBUGOUT("Initializing the Flow Control address, type and timer regs\n"); + E1000_WRITE_REG(hw, E1000_FCT, FLOW_CONTROL_TYPE); + E1000_WRITE_REG(hw, E1000_FCAH, FLOW_CONTROL_ADDRESS_HIGH); + E1000_WRITE_REG(hw, E1000_FCAL, FLOW_CONTROL_ADDRESS_LOW); + + E1000_WRITE_REG(hw, E1000_FCTTV, hw->fc.pause_time); + + return e1000_set_fc_watermarks_generic(hw); +} + +/** + * e1000_commit_fc_settings_generic - Configure flow control + * @hw: pointer to the HW structure + * + * Write the flow control settings to the Transmit Config Word Register (TXCW) + * base on the flow control settings in e1000_mac_info. + **/ +static s32 e1000_commit_fc_settings_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 txcw; + + DEBUGFUNC("e1000_commit_fc_settings_generic"); + + /* Check for a software override of the flow control settings, and + * setup the device accordingly. If auto-negotiation is enabled, then + * software will have to set the "PAUSE" bits to the correct value in + * the Transmit Config Word Register (TXCW) and re-start auto- + * negotiation. However, if auto-negotiation is disabled, then + * software will have to manually configure the two flow control enable + * bits in the CTRL register. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but we + * do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + */ + switch (hw->fc.current_mode) { + case e1000_fc_none: + /* Flow control completely disabled by a software over-ride. */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD); + break; + case e1000_fc_rx_pause: + /* Rx Flow control is enabled and Tx Flow control is disabled + * by a software over-ride. Since there really isn't a way to + * advertise that we are capable of Rx Pause ONLY, we will + * advertise that we support both symmetric and asymmetric Rx + * PAUSE. Later, we will disable the adapter's ability to send + * PAUSE frames. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); + break; + case e1000_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is disabled, + * by a software over-ride. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_ASM_DIR); + break; + case e1000_fc_full: + /* Flow control (both Rx and Tx) is enabled by a software + * over-ride. + */ + txcw = (E1000_TXCW_ANE | E1000_TXCW_FD | E1000_TXCW_PAUSE_MASK); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + break; + } + + E1000_WRITE_REG(hw, E1000_TXCW, txcw); + mac->txcw = txcw; + + return E1000_SUCCESS; +} + +/** + * e1000_poll_fiber_serdes_link_generic - Poll for link up + * @hw: pointer to the HW structure + * + * Polls for link up by reading the status register, if link fails to come + * up with auto-negotiation, then the link is forced if a signal is detected. + **/ +static s32 e1000_poll_fiber_serdes_link_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 i, status; + s32 ret_val; + + DEBUGFUNC("e1000_poll_fiber_serdes_link_generic"); + + /* If we have a signal (the cable is plugged in, or assumed true for + * serdes media) then poll for a "Link-Up" indication in the Device + * Status Register. Time-out if a link isn't seen in 500 milliseconds + * seconds (Auto-negotiation should complete in less than 500 + * milliseconds even if the other end is doing it in SW). + */ + for (i = 0; i < FIBER_LINK_UP_LIMIT; i++) { + msec_delay(10); + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_LU) + break; + } + if (i == FIBER_LINK_UP_LIMIT) { + DEBUGOUT("Never got a valid link from auto-neg!!!\n"); + mac->autoneg_failed = true; + /* AutoNeg failed to achieve a link, so we'll call + * mac->check_for_link. This routine will force the + * link up if we detect a signal. This will allow us to + * communicate with non-autonegotiating link partners. + */ + ret_val = mac->ops.check_for_link(hw); + if (ret_val) { + DEBUGOUT("Error while checking for link\n"); + return ret_val; + } + mac->autoneg_failed = false; + } else { + mac->autoneg_failed = false; + DEBUGOUT("Valid Link Found\n"); + } + + return E1000_SUCCESS; +} + +/** + * e1000_setup_fiber_serdes_link_generic - Setup link for fiber/serdes + * @hw: pointer to the HW structure + * + * Configures collision distance and flow control for fiber and serdes + * links. Upon successful setup, poll for link. + **/ +s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw) +{ + u32 ctrl; + s32 ret_val; + + DEBUGFUNC("e1000_setup_fiber_serdes_link_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* Take the link out of reset */ + ctrl &= ~E1000_CTRL_LRST; + + hw->mac.ops.config_collision_dist(hw); + + ret_val = e1000_commit_fc_settings_generic(hw); + if (ret_val) + return ret_val; + + /* Since auto-negotiation is enabled, take the link out of reset (the + * link will be in reset, because we previously reset the chip). This + * will restart auto-negotiation. If auto-negotiation is successful + * then the link-up status bit will be set and the flow control enable + * bits (RFCE and TFCE) will be set according to their negotiated value. + */ + DEBUGOUT("Auto-negotiation enabled\n"); + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + msec_delay(1); + + /* For these adapters, the SW definable pin 1 is set when the optics + * detect a signal. If we have a signal, then poll for a "Link-Up" + * indication. + */ + if (hw->phy.media_type == e1000_media_type_internal_serdes || + (E1000_READ_REG(hw, E1000_CTRL) & E1000_CTRL_SWDPIN1)) { + ret_val = e1000_poll_fiber_serdes_link_generic(hw); + } else { + DEBUGOUT("No signal detected\n"); + } + + return ret_val; +} + +/** + * e1000_config_collision_dist_generic - Configure collision distance + * @hw: pointer to the HW structure + * + * Configures the collision distance to the default value and is used + * during link setup. + **/ +static void e1000_config_collision_dist_generic(struct e1000_hw *hw) +{ + u32 tctl; + + DEBUGFUNC("e1000_config_collision_dist_generic"); + + tctl = E1000_READ_REG(hw, E1000_TCTL); + + tctl &= ~E1000_TCTL_COLD; + tctl |= E1000_COLLISION_DISTANCE << E1000_COLD_SHIFT; + + E1000_WRITE_REG(hw, E1000_TCTL, tctl); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_set_fc_watermarks_generic - Set flow control high/low watermarks + * @hw: pointer to the HW structure + * + * Sets the flow control high/low threshold (watermark) registers. If + * flow control XON frame transmission is enabled, then set XON frame + * transmission as well. + **/ +s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw) +{ + u32 fcrtl = 0, fcrth = 0; + + DEBUGFUNC("e1000_set_fc_watermarks_generic"); + + /* Set the flow control receive threshold registers. Normally, + * these registers will be set to a default threshold that may be + * adjusted later by the driver's runtime code. However, if the + * ability to transmit pause frames is not enabled, then these + * registers will be set to 0. + */ + if (hw->fc.current_mode & e1000_fc_tx_pause) { + /* We need to set up the Receive Threshold high and low water + * marks as well as (optionally) enabling the transmission of + * XON frames. + */ + fcrtl = hw->fc.low_water; + if (hw->fc.send_xon) + fcrtl |= E1000_FCRTL_XONE; + + fcrth = hw->fc.high_water; + } + E1000_WRITE_REG(hw, E1000_FCRTL, fcrtl); + E1000_WRITE_REG(hw, E1000_FCRTH, fcrth); + + return E1000_SUCCESS; +} + +/** + * e1000_force_mac_fc_generic - Force the MAC's flow control settings + * @hw: pointer to the HW structure + * + * Force the MAC's flow control settings. Sets the TFCE and RFCE bits in the + * device control register to reflect the adapter settings. TFCE and RFCE + * need to be explicitly set by software when a copper PHY is used because + * autonegotiation is managed by the PHY rather than the MAC. Software must + * also configure these bits when link is forced on a fiber connection. + **/ +s32 e1000_force_mac_fc_generic(struct e1000_hw *hw) +{ + u32 ctrl; + + DEBUGFUNC("e1000_force_mac_fc_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + + /* Because we didn't get link via the internal auto-negotiation + * mechanism (we either forced link or we got link via PHY + * auto-neg), we have to manually enable/disable transmit an + * receive flow control. + * + * The "Case" statement below enables/disable flow control + * according to the "hw->fc.current_mode" parameter. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause + * frames but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * frames but we do not receive pause frames). + * 3: Both Rx and Tx flow control (symmetric) is enabled. + * other: No other values should be possible at this point. + */ + DEBUGOUT1("hw->fc.current_mode = %u\n", hw->fc.current_mode); + + switch (hw->fc.current_mode) { + case e1000_fc_none: + ctrl &= (~(E1000_CTRL_TFCE | E1000_CTRL_RFCE)); + break; + case e1000_fc_rx_pause: + ctrl &= (~E1000_CTRL_TFCE); + ctrl |= E1000_CTRL_RFCE; + break; + case e1000_fc_tx_pause: + ctrl &= (~E1000_CTRL_RFCE); + ctrl |= E1000_CTRL_TFCE; + break; + case e1000_fc_full: + ctrl |= (E1000_CTRL_TFCE | E1000_CTRL_RFCE); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + } + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + return E1000_SUCCESS; +} + +/** + * e1000_config_fc_after_link_up_generic - Configures flow control after link + * @hw: pointer to the HW structure + * + * Checks the status of auto-negotiation after link up to ensure that the + * speed and duplex were not forced. If the link needed to be forced, then + * flow control needs to be forced also. If auto-negotiation is enabled + * and did not fail, then we configure flow control based on our link + * partner. + **/ +s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val = E1000_SUCCESS; + u32 pcs_status_reg, pcs_adv_reg, pcs_lp_ability_reg, pcs_ctrl_reg; + u16 mii_status_reg, mii_nway_adv_reg, mii_nway_lp_ability_reg; + u16 speed, duplex; + + DEBUGFUNC("e1000_config_fc_after_link_up_generic"); + + /* Check for the case where we have fiber media and auto-neg failed + * so we had to force link. In this case, we need to force the + * configuration of the MAC to match the "fc" parameter. + */ + if (mac->autoneg_failed) { + if (hw->phy.media_type == e1000_media_type_fiber || + hw->phy.media_type == e1000_media_type_internal_serdes) + ret_val = e1000_force_mac_fc_generic(hw); + } else { + if (hw->phy.media_type == e1000_media_type_copper) + ret_val = e1000_force_mac_fc_generic(hw); + } + + if (ret_val) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + + /* Check for the case where we have copper media and auto-neg is + * enabled. In this case, we need to check and see if Auto-Neg + * has completed, and if so, how the PHY and link partner has + * flow control configured. + */ + if ((hw->phy.media_type == e1000_media_type_copper) && mac->autoneg) { + /* Read the MII Status Register and check to see if AutoNeg + * has completed. We read this twice because this reg has + * some "sticky" (latched) bits. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); + if (ret_val) + return ret_val; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &mii_status_reg); + if (ret_val) + return ret_val; + + if (!(mii_status_reg & MII_SR_AUTONEG_COMPLETE)) { + DEBUGOUT("Copper PHY and Auto Neg has not completed.\n"); + return ret_val; + } + + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement + * Register (Address 4) and the Auto_Negotiation Base + * Page Ability Register (Address 5) to determine how + * flow control was negotiated. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_AUTONEG_ADV, + &mii_nway_adv_reg); + if (ret_val) + return ret_val; + ret_val = hw->phy.ops.read_reg(hw, PHY_LP_ABILITY, + &mii_nway_lp_ability_reg); + if (ret_val) + return ret_val; + + /* Two bits in the Auto Negotiation Advertisement Register + * (Address 4) and two bits in the Auto Negotiation Base + * Page Ability Register (Address 5) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | e1000_fc_none + * 0 | 1 | 0 | DC | e1000_fc_none + * 0 | 1 | 1 | 0 | e1000_fc_none + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + * 1 | 0 | 0 | DC | e1000_fc_none + * 1 | DC | 1 | DC | e1000_fc_full + * 1 | 1 | 0 | 0 | e1000_fc_none + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + * + * Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | E1000_fc_full + * + */ + if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE)) { + /* Now we need to check if the user selected Rx ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise Rx + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == e1000_fc_full) { + hw->fc.current_mode = e1000_fc_full; + DEBUGOUT("Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } + } + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + */ + else if (!(mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + (mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_tx_pause; + DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + */ + else if ((mii_nway_adv_reg & NWAY_AR_PAUSE) && + (mii_nway_adv_reg & NWAY_AR_ASM_DIR) && + !(mii_nway_lp_ability_reg & NWAY_LPAR_PAUSE) && + (mii_nway_lp_ability_reg & NWAY_LPAR_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } else { + /* Per the IEEE spec, at this point flow control + * should be disabled. + */ + hw->fc.current_mode = e1000_fc_none; + DEBUGOUT("Flow Control = NONE.\n"); + } + + /* Now we need to do one last check... If we auto- + * negotiated to HALF DUPLEX, flow control should not be + * enabled per IEEE 802.3 spec. + */ + ret_val = mac->ops.get_link_up_info(hw, &speed, &duplex); + if (ret_val) { + DEBUGOUT("Error getting link speed and duplex\n"); + return ret_val; + } + + if (duplex == HALF_DUPLEX) + hw->fc.current_mode = e1000_fc_none; + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + ret_val = e1000_force_mac_fc_generic(hw); + if (ret_val) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + } + + /* Check for the case where we have SerDes media and auto-neg is + * enabled. In this case, we need to check and see if Auto-Neg + * has completed, and if so, how the PHY and link partner has + * flow control configured. + */ + if ((hw->phy.media_type == e1000_media_type_internal_serdes) && + mac->autoneg) { + /* Read the PCS_LSTS and check to see if AutoNeg + * has completed. + */ + pcs_status_reg = E1000_READ_REG(hw, E1000_PCS_LSTAT); + + if (!(pcs_status_reg & E1000_PCS_LSTS_AN_COMPLETE)) { + DEBUGOUT("PCS Auto Neg has not completed.\n"); + return ret_val; + } + + /* The AutoNeg process has completed, so we now need to + * read both the Auto Negotiation Advertisement + * Register (PCS_ANADV) and the Auto_Negotiation Base + * Page Ability Register (PCS_LPAB) to determine how + * flow control was negotiated. + */ + pcs_adv_reg = E1000_READ_REG(hw, E1000_PCS_ANADV); + pcs_lp_ability_reg = E1000_READ_REG(hw, E1000_PCS_LPAB); + + /* Two bits in the Auto Negotiation Advertisement Register + * (PCS_ANADV) and two bits in the Auto Negotiation Base + * Page Ability Register (PCS_LPAB) determine flow control + * for both the PHY and the link partner. The following + * table, taken out of the IEEE 802.3ab/D6.0 dated March 25, + * 1999, describes these PAUSE resolution bits and how flow + * control is determined based upon these settings. + * NOTE: DC = Don't Care + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | NIC Resolution + *-------|---------|-------|---------|-------------------- + * 0 | 0 | DC | DC | e1000_fc_none + * 0 | 1 | 0 | DC | e1000_fc_none + * 0 | 1 | 1 | 0 | e1000_fc_none + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + * 1 | 0 | 0 | DC | e1000_fc_none + * 1 | DC | 1 | DC | e1000_fc_full + * 1 | 1 | 0 | 0 | e1000_fc_none + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + * + * Are both PAUSE bits set to 1? If so, this implies + * Symmetric Flow Control is enabled at both ends. The + * ASM_DIR bits are irrelevant per the spec. + * + * For Symmetric Flow Control: + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | DC | 1 | DC | e1000_fc_full + * + */ + if ((pcs_adv_reg & E1000_TXCW_PAUSE) && + (pcs_lp_ability_reg & E1000_TXCW_PAUSE)) { + /* Now we need to check if the user selected Rx ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise Rx + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == e1000_fc_full) { + hw->fc.current_mode = e1000_fc_full; + DEBUGOUT("Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } + } + /* For receiving PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 0 | 1 | 1 | 1 | e1000_fc_tx_pause + */ + else if (!(pcs_adv_reg & E1000_TXCW_PAUSE) && + (pcs_adv_reg & E1000_TXCW_ASM_DIR) && + (pcs_lp_ability_reg & E1000_TXCW_PAUSE) && + (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_tx_pause; + DEBUGOUT("Flow Control = Tx PAUSE frames only.\n"); + } + /* For transmitting PAUSE frames ONLY. + * + * LOCAL DEVICE | LINK PARTNER + * PAUSE | ASM_DIR | PAUSE | ASM_DIR | Result + *-------|---------|-------|---------|-------------------- + * 1 | 1 | 0 | 1 | e1000_fc_rx_pause + */ + else if ((pcs_adv_reg & E1000_TXCW_PAUSE) && + (pcs_adv_reg & E1000_TXCW_ASM_DIR) && + !(pcs_lp_ability_reg & E1000_TXCW_PAUSE) && + (pcs_lp_ability_reg & E1000_TXCW_ASM_DIR)) { + hw->fc.current_mode = e1000_fc_rx_pause; + DEBUGOUT("Flow Control = Rx PAUSE frames only.\n"); + } else { + /* Per the IEEE spec, at this point flow control + * should be disabled. + */ + hw->fc.current_mode = e1000_fc_none; + DEBUGOUT("Flow Control = NONE.\n"); + } + + /* Now we call a subroutine to actually force the MAC + * controller to use the correct flow control settings. + */ + pcs_ctrl_reg = E1000_READ_REG(hw, E1000_PCS_LCTL); + pcs_ctrl_reg |= E1000_PCS_LCTL_FORCE_FCTRL; + E1000_WRITE_REG(hw, E1000_PCS_LCTL, pcs_ctrl_reg); + + ret_val = e1000_force_mac_fc_generic(hw); + if (ret_val) { + DEBUGOUT("Error forcing flow control settings\n"); + return ret_val; + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_get_speed_and_duplex_copper_generic - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Read the status register for the current speed/duplex and store the current + * speed and duplex for copper connections. + **/ +s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, + u16 *duplex) +{ + u32 status; + + DEBUGFUNC("e1000_get_speed_and_duplex_copper_generic"); + + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_SPEED_1000) { + *speed = SPEED_1000; + DEBUGOUT("1000 Mbs, "); + } else if (status & E1000_STATUS_SPEED_100) { + *speed = SPEED_100; + DEBUGOUT("100 Mbs, "); + } else { + *speed = SPEED_10; + DEBUGOUT("10 Mbs, "); + } + + if (status & E1000_STATUS_FD) { + *duplex = FULL_DUPLEX; + DEBUGOUT("Full Duplex\n"); + } else { + *duplex = HALF_DUPLEX; + DEBUGOUT("Half Duplex\n"); + } + + return E1000_SUCCESS; +} + +/** + * e1000_get_speed_and_duplex_fiber_generic - Retrieve current speed/duplex + * @hw: pointer to the HW structure + * @speed: stores the current speed + * @duplex: stores the current duplex + * + * Sets the speed and duplex to gigabit full duplex (the only possible option) + * for fiber/serdes links. + **/ +s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw E1000_UNUSEDARG *hw, + u16 *speed, u16 *duplex) +{ + DEBUGFUNC("e1000_get_speed_and_duplex_fiber_serdes_generic"); + + *speed = SPEED_1000; + *duplex = FULL_DUPLEX; + + return E1000_SUCCESS; +} + +/** + * e1000_get_hw_semaphore_generic - Acquire hardware semaphore + * @hw: pointer to the HW structure + * + * Acquire the HW semaphore to access the PHY or NVM + **/ +s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw) +{ + u32 swsm; + s32 timeout = hw->nvm.word_size + 1; + s32 i = 0; + + DEBUGFUNC("e1000_get_hw_semaphore_generic"); + + /* Get the SW semaphore */ + while (i < timeout) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + if (!(swsm & E1000_SWSM_SMBI)) + break; + + usec_delay(50); + i++; + } + + if (i == timeout) { + DEBUGOUT("Driver can't access device - SMBI bit is set.\n"); + return -E1000_ERR_NVM; + } + + /* Get the FW semaphore. */ + for (i = 0; i < timeout; i++) { + swsm = E1000_READ_REG(hw, E1000_SWSM); + E1000_WRITE_REG(hw, E1000_SWSM, swsm | E1000_SWSM_SWESMBI); + + /* Semaphore acquired if bit latched */ + if (E1000_READ_REG(hw, E1000_SWSM) & E1000_SWSM_SWESMBI) + break; + + usec_delay(50); + } + + if (i == timeout) { + /* Release semaphores */ + e1000_put_hw_semaphore_generic(hw); + DEBUGOUT("Driver can't access the NVM\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_put_hw_semaphore_generic - Release hardware semaphore + * @hw: pointer to the HW structure + * + * Release hardware semaphore used to access the PHY or NVM + **/ +void e1000_put_hw_semaphore_generic(struct e1000_hw *hw) +{ + u32 swsm; + + DEBUGFUNC("e1000_put_hw_semaphore_generic"); + + swsm = E1000_READ_REG(hw, E1000_SWSM); + + swsm &= ~(E1000_SWSM_SMBI | E1000_SWSM_SWESMBI); + + E1000_WRITE_REG(hw, E1000_SWSM, swsm); +} + +/** + * e1000_get_auto_rd_done_generic - Check for auto read completion + * @hw: pointer to the HW structure + * + * Check EEPROM for Auto Read done bit. + **/ +s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw) +{ + s32 i = 0; + + DEBUGFUNC("e1000_get_auto_rd_done_generic"); + + while (i < AUTO_READ_DONE_TIMEOUT) { + if (E1000_READ_REG(hw, E1000_EECD) & E1000_EECD_AUTO_RD) + break; + msec_delay(1); + i++; + } + + if (i == AUTO_READ_DONE_TIMEOUT) { + DEBUGOUT("Auto read by HW from NVM has not completed.\n"); + return -E1000_ERR_RESET; + } + + return E1000_SUCCESS; +} + +/** + * e1000_valid_led_default_generic - Verify a valid default LED config + * @hw: pointer to the HW structure + * @data: pointer to the NVM (EEPROM) + * + * Read the EEPROM for the current default LED configuration. If the + * LED configuration is not valid, set to a valid LED configuration. + **/ +s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_valid_led_default_generic"); + + ret_val = hw->nvm.ops.read(hw, NVM_ID_LED_SETTINGS, 1, data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (*data == ID_LED_RESERVED_0000 || *data == ID_LED_RESERVED_FFFF) + *data = ID_LED_DEFAULT; + + return E1000_SUCCESS; +} + +/** + * e1000_id_led_init_generic - + * @hw: pointer to the HW structure + * + **/ +s32 e1000_id_led_init_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + s32 ret_val; + const u32 ledctl_mask = 0x000000FF; + const u32 ledctl_on = E1000_LEDCTL_MODE_LED_ON; + const u32 ledctl_off = E1000_LEDCTL_MODE_LED_OFF; + u16 data, i, temp; + const u16 led_mask = 0x0F; + + DEBUGFUNC("e1000_id_led_init_generic"); + + ret_val = hw->nvm.ops.valid_led_default(hw, &data); + if (ret_val) + return ret_val; + + mac->ledctl_default = E1000_READ_REG(hw, E1000_LEDCTL); + mac->ledctl_mode1 = mac->ledctl_default; + mac->ledctl_mode2 = mac->ledctl_default; + + for (i = 0; i < 4; i++) { + temp = (data >> (i << 2)) & led_mask; + switch (temp) { + case ID_LED_ON1_DEF2: + case ID_LED_ON1_ON2: + case ID_LED_ON1_OFF2: + mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode1 |= ledctl_on << (i << 3); + break; + case ID_LED_OFF1_DEF2: + case ID_LED_OFF1_ON2: + case ID_LED_OFF1_OFF2: + mac->ledctl_mode1 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode1 |= ledctl_off << (i << 3); + break; + default: + /* Do nothing */ + break; + } + switch (temp) { + case ID_LED_DEF1_ON2: + case ID_LED_ON1_ON2: + case ID_LED_OFF1_ON2: + mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode2 |= ledctl_on << (i << 3); + break; + case ID_LED_DEF1_OFF2: + case ID_LED_ON1_OFF2: + case ID_LED_OFF1_OFF2: + mac->ledctl_mode2 &= ~(ledctl_mask << (i << 3)); + mac->ledctl_mode2 |= ledctl_off << (i << 3); + break; + default: + /* Do nothing */ + break; + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_setup_led_generic - Configures SW controllable LED + * @hw: pointer to the HW structure + * + * This prepares the SW controllable LED for use and saves the current state + * of the LED so it can be later restored. + **/ +s32 e1000_setup_led_generic(struct e1000_hw *hw) +{ + u32 ledctl; + + DEBUGFUNC("e1000_setup_led_generic"); + + if (hw->mac.ops.setup_led != e1000_setup_led_generic) + return -E1000_ERR_CONFIG; + + if (hw->phy.media_type == e1000_media_type_fiber) { + ledctl = E1000_READ_REG(hw, E1000_LEDCTL); + hw->mac.ledctl_default = ledctl; + /* Turn off LED0 */ + ledctl &= ~(E1000_LEDCTL_LED0_IVRT | E1000_LEDCTL_LED0_BLINK | + E1000_LEDCTL_LED0_MODE_MASK); + ledctl |= (E1000_LEDCTL_MODE_LED_OFF << + E1000_LEDCTL_LED0_MODE_SHIFT); + E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl); + } else if (hw->phy.media_type == e1000_media_type_copper) { + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); + } + + return E1000_SUCCESS; +} + +/** + * e1000_cleanup_led_generic - Set LED config to default operation + * @hw: pointer to the HW structure + * + * Remove the current LED configuration and set the LED configuration + * to the default value, saved from the EEPROM. + **/ +s32 e1000_cleanup_led_generic(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_cleanup_led_generic"); + + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_default); + return E1000_SUCCESS; +} + +/** + * e1000_blink_led_generic - Blink LED + * @hw: pointer to the HW structure + * + * Blink the LEDs which are set to be on. + **/ +s32 e1000_blink_led_generic(struct e1000_hw *hw) +{ + u32 ledctl_blink = 0; + u32 i; + + DEBUGFUNC("e1000_blink_led_generic"); + + if (hw->phy.media_type == e1000_media_type_fiber) { + /* always blink LED0 for PCI-E fiber */ + ledctl_blink = E1000_LEDCTL_LED0_BLINK | + (E1000_LEDCTL_MODE_LED_ON << E1000_LEDCTL_LED0_MODE_SHIFT); + } else { + /* Set the blink bit for each LED that's "on" (0x0E) + * (or "off" if inverted) in ledctl_mode2. The blink + * logic in hardware only works when mode is set to "on" + * so it must be changed accordingly when the mode is + * "off" and inverted. + */ + ledctl_blink = hw->mac.ledctl_mode2; + for (i = 0; i < 32; i += 8) { + u32 mode = (hw->mac.ledctl_mode2 >> i) & + E1000_LEDCTL_LED0_MODE_MASK; + u32 led_default = hw->mac.ledctl_default >> i; + + if ((!(led_default & E1000_LEDCTL_LED0_IVRT) && + (mode == E1000_LEDCTL_MODE_LED_ON)) || + ((led_default & E1000_LEDCTL_LED0_IVRT) && + (mode == E1000_LEDCTL_MODE_LED_OFF))) { + ledctl_blink &= + ~(E1000_LEDCTL_LED0_MODE_MASK << i); + ledctl_blink |= (E1000_LEDCTL_LED0_BLINK | + E1000_LEDCTL_MODE_LED_ON) << i; + } + } + } + + E1000_WRITE_REG(hw, E1000_LEDCTL, ledctl_blink); + + return E1000_SUCCESS; +} + +/** + * e1000_led_on_generic - Turn LED on + * @hw: pointer to the HW structure + * + * Turn LED on. + **/ +s32 e1000_led_on_generic(struct e1000_hw *hw) +{ + u32 ctrl; + + DEBUGFUNC("e1000_led_on_generic"); + + switch (hw->phy.media_type) { + case e1000_media_type_fiber: + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl &= ~E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + break; + case e1000_media_type_copper: + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode2); + break; + default: + break; + } + + return E1000_SUCCESS; +} + +/** + * e1000_led_off_generic - Turn LED off + * @hw: pointer to the HW structure + * + * Turn LED off. + **/ +s32 e1000_led_off_generic(struct e1000_hw *hw) +{ + u32 ctrl; + + DEBUGFUNC("e1000_led_off_generic"); + + switch (hw->phy.media_type) { + case e1000_media_type_fiber: + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_SWDPIN0; + ctrl |= E1000_CTRL_SWDPIO0; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + break; + case e1000_media_type_copper: + E1000_WRITE_REG(hw, E1000_LEDCTL, hw->mac.ledctl_mode1); + break; + default: + break; + } + + return E1000_SUCCESS; +} + +/** + * e1000_set_pcie_no_snoop_generic - Set PCI-express capabilities + * @hw: pointer to the HW structure + * @no_snoop: bitmap of snoop events + * + * Set the PCI-express register to snoop for events enabled in 'no_snoop'. + **/ +void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop) +{ + u32 gcr; + + DEBUGFUNC("e1000_set_pcie_no_snoop_generic"); + + if (no_snoop) { + gcr = E1000_READ_REG(hw, E1000_GCR); + gcr &= ~(PCIE_NO_SNOOP_ALL); + gcr |= no_snoop; + E1000_WRITE_REG(hw, E1000_GCR, gcr); + } +} + +/** + * e1000_disable_pcie_master_generic - Disables PCI-express master access + * @hw: pointer to the HW structure + * + * Returns E1000_SUCCESS if successful, else returns -10 + * (-E1000_ERR_MASTER_REQUESTS_PENDING) if master disable bit has not caused + * the master requests to be disabled. + * + * Disables PCI-Express master access and verifies there are no pending + * requests. + **/ +s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw) +{ + u32 ctrl; + s32 timeout = MASTER_DISABLE_TIMEOUT; + + DEBUGFUNC("e1000_disable_pcie_master_generic"); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_GIO_MASTER_DISABLE; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + while (timeout) { + if (!(E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_GIO_MASTER_ENABLE)) + break; + usec_delay(100); + timeout--; + } + + if (!timeout) { + DEBUGOUT("Master requests are pending.\n"); + return -E1000_ERR_MASTER_REQUESTS_PENDING; + } + + return E1000_SUCCESS; +} + +/** + * e1000_reset_adaptive_generic - Reset Adaptive Interframe Spacing + * @hw: pointer to the HW structure + * + * Reset the Adaptive Interframe Spacing throttle to default values. + **/ +void e1000_reset_adaptive_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + + DEBUGFUNC("e1000_reset_adaptive_generic"); + + if (!mac->adaptive_ifs) { + DEBUGOUT("Not in Adaptive IFS mode!\n"); + return; + } + + mac->current_ifs_val = 0; + mac->ifs_min_val = IFS_MIN; + mac->ifs_max_val = IFS_MAX; + mac->ifs_step_size = IFS_STEP; + mac->ifs_ratio = IFS_RATIO; + + mac->in_ifs_mode = false; + E1000_WRITE_REG(hw, E1000_AIT, 0); +} + +/** + * e1000_update_adaptive_generic - Update Adaptive Interframe Spacing + * @hw: pointer to the HW structure + * + * Update the Adaptive Interframe Spacing Throttle value based on the + * time between transmitted packets and time between collisions. + **/ +void e1000_update_adaptive_generic(struct e1000_hw *hw) +{ + struct e1000_mac_info *mac = &hw->mac; + + DEBUGFUNC("e1000_update_adaptive_generic"); + + if (!mac->adaptive_ifs) { + DEBUGOUT("Not in Adaptive IFS mode!\n"); + return; + } + + if ((mac->collision_delta * mac->ifs_ratio) > mac->tx_packet_delta) { + if (mac->tx_packet_delta > MIN_NUM_XMITS) { + mac->in_ifs_mode = true; + if (mac->current_ifs_val < mac->ifs_max_val) { + if (!mac->current_ifs_val) + mac->current_ifs_val = mac->ifs_min_val; + else + mac->current_ifs_val += + mac->ifs_step_size; + E1000_WRITE_REG(hw, E1000_AIT, + mac->current_ifs_val); + } + } + } else { + if (mac->in_ifs_mode && + (mac->tx_packet_delta <= MIN_NUM_XMITS)) { + mac->current_ifs_val = 0; + mac->in_ifs_mode = false; + E1000_WRITE_REG(hw, E1000_AIT, 0); + } + } +} + +/** + * e1000_validate_mdi_setting_generic - Verify MDI/MDIx settings + * @hw: pointer to the HW structure + * + * Verify that when not using auto-negotiation that MDI/MDIx is correctly + * set, which is forced to MDI mode only. + **/ +static s32 e1000_validate_mdi_setting_generic(struct e1000_hw *hw) +{ + DEBUGFUNC("e1000_validate_mdi_setting_generic"); + + if (!hw->mac.autoneg && (hw->phy.mdix == 0 || hw->phy.mdix == 3)) { + DEBUGOUT("Invalid MDI setting detected\n"); + hw->phy.mdix = 1; + return -E1000_ERR_CONFIG; + } + + return E1000_SUCCESS; +} + +/** + * e1000_validate_mdi_setting_crossover_generic - Verify MDI/MDIx settings + * @hw: pointer to the HW structure + * + * Validate the MDI/MDIx setting, allowing for auto-crossover during forced + * operation. + **/ +s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_validate_mdi_setting_crossover_generic"); + + return E1000_SUCCESS; +} + +/** + * e1000_write_8bit_ctrl_reg_generic - Write a 8bit CTRL register + * @hw: pointer to the HW structure + * @reg: 32bit register offset such as E1000_SCTL + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes an address/data control type register. There are several of these + * and they all have the format address << 8 | data and bit 31 is polled for + * completion. + **/ +s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, + u32 offset, u8 data) +{ + u32 i, regvalue = 0; + + DEBUGFUNC("e1000_write_8bit_ctrl_reg_generic"); + + /* Set up the address and data */ + regvalue = ((u32)data) | (offset << E1000_GEN_CTL_ADDRESS_SHIFT); + E1000_WRITE_REG(hw, reg, regvalue); + + /* Poll the ready bit to see if the MDI read completed */ + for (i = 0; i < E1000_GEN_POLL_TIMEOUT; i++) { + usec_delay(5); + regvalue = E1000_READ_REG(hw, reg); + if (regvalue & E1000_GEN_CTL_READY) + break; + } + if (!(regvalue & E1000_GEN_CTL_READY)) { + DEBUGOUT1("Reg %08x did not indicate ready\n", reg); + return -E1000_ERR_PHY; + } + + return E1000_SUCCESS; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h new file mode 100644 index 00000000..6a1b0f52 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mac.h @@ -0,0 +1,80 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_MAC_H_ +#define _E1000_MAC_H_ + +void e1000_init_mac_ops_generic(struct e1000_hw *hw); +void e1000_null_mac_generic(struct e1000_hw *hw); +s32 e1000_null_ops_generic(struct e1000_hw *hw); +s32 e1000_null_link_info(struct e1000_hw *hw, u16 *s, u16 *d); +bool e1000_null_mng_mode(struct e1000_hw *hw); +void e1000_null_update_mc(struct e1000_hw *hw, u8 *h, u32 a); +void e1000_null_write_vfta(struct e1000_hw *hw, u32 a, u32 b); +void e1000_null_rar_set(struct e1000_hw *hw, u8 *h, u32 a); +s32 e1000_blink_led_generic(struct e1000_hw *hw); +s32 e1000_check_for_copper_link_generic(struct e1000_hw *hw); +s32 e1000_check_for_fiber_link_generic(struct e1000_hw *hw); +s32 e1000_check_for_serdes_link_generic(struct e1000_hw *hw); +s32 e1000_cleanup_led_generic(struct e1000_hw *hw); +s32 e1000_config_fc_after_link_up_generic(struct e1000_hw *hw); +s32 e1000_disable_pcie_master_generic(struct e1000_hw *hw); +s32 e1000_force_mac_fc_generic(struct e1000_hw *hw); +s32 e1000_get_auto_rd_done_generic(struct e1000_hw *hw); +s32 e1000_get_bus_info_pcie_generic(struct e1000_hw *hw); +void e1000_set_lan_id_single_port(struct e1000_hw *hw); +s32 e1000_get_hw_semaphore_generic(struct e1000_hw *hw); +s32 e1000_get_speed_and_duplex_copper_generic(struct e1000_hw *hw, u16 *speed, + u16 *duplex); +s32 e1000_get_speed_and_duplex_fiber_serdes_generic(struct e1000_hw *hw, + u16 *speed, u16 *duplex); +s32 e1000_id_led_init_generic(struct e1000_hw *hw); +s32 e1000_led_on_generic(struct e1000_hw *hw); +s32 e1000_led_off_generic(struct e1000_hw *hw); +void e1000_update_mc_addr_list_generic(struct e1000_hw *hw, + u8 *mc_addr_list, u32 mc_addr_count); +s32 e1000_set_fc_watermarks_generic(struct e1000_hw *hw); +s32 e1000_setup_fiber_serdes_link_generic(struct e1000_hw *hw); +s32 e1000_setup_led_generic(struct e1000_hw *hw); +s32 e1000_setup_link_generic(struct e1000_hw *hw); +s32 e1000_validate_mdi_setting_crossover_generic(struct e1000_hw *hw); +s32 e1000_write_8bit_ctrl_reg_generic(struct e1000_hw *hw, u32 reg, + u32 offset, u8 data); + +u32 e1000_hash_mc_addr_generic(struct e1000_hw *hw, u8 *mc_addr); + +void e1000_clear_hw_cntrs_base_generic(struct e1000_hw *hw); +void e1000_clear_vfta_generic(struct e1000_hw *hw); +void e1000_init_rx_addrs_generic(struct e1000_hw *hw, u16 rar_count); +void e1000_put_hw_semaphore_generic(struct e1000_hw *hw); +s32 e1000_check_alt_mac_addr_generic(struct e1000_hw *hw); +void e1000_reset_adaptive_generic(struct e1000_hw *hw); +void e1000_set_pcie_no_snoop_generic(struct e1000_hw *hw, u32 no_snoop); +void e1000_update_adaptive_generic(struct e1000_hw *hw); +void e1000_write_vfta_generic(struct e1000_hw *hw, u32 offset, u32 value); + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c new file mode 100644 index 00000000..a1700398 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.c @@ -0,0 +1,554 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +/** + * e1000_calculate_checksum - Calculate checksum for buffer + * @buffer: pointer to EEPROM + * @length: size of EEPROM to calculate a checksum for + * + * Calculates the checksum for some buffer on a specified length. The + * checksum calculated is returned. + **/ +u8 e1000_calculate_checksum(u8 *buffer, u32 length) +{ + u32 i; + u8 sum = 0; + + DEBUGFUNC("e1000_calculate_checksum"); + + if (!buffer) + return 0; + + for (i = 0; i < length; i++) + sum += buffer[i]; + + return (u8) (0 - sum); +} + +/** + * e1000_mng_enable_host_if_generic - Checks host interface is enabled + * @hw: pointer to the HW structure + * + * Returns E1000_success upon success, else E1000_ERR_HOST_INTERFACE_COMMAND + * + * This function checks whether the HOST IF is enabled for command operation + * and also checks whether the previous command is completed. It busy waits + * in case of previous command is not completed. + **/ +s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw) +{ + u32 hicr; + u8 i; + + DEBUGFUNC("e1000_mng_enable_host_if_generic"); + + if (!hw->mac.arc_subsystem_valid) { + DEBUGOUT("ARC subsystem not valid.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Check that the host interface is enabled. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_EN)) { + DEBUGOUT("E1000_HOST_EN bit disabled.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + /* check the previous command is completed */ + for (i = 0; i < E1000_MNG_DHCP_COMMAND_TIMEOUT; i++) { + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_C)) + break; + msec_delay_irq(1); + } + + if (i == E1000_MNG_DHCP_COMMAND_TIMEOUT) { + DEBUGOUT("Previous command timeout failed .\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + return E1000_SUCCESS; +} + +/** + * e1000_check_mng_mode_generic - Generic check management mode + * @hw: pointer to the HW structure + * + * Reads the firmware semaphore register and returns true (>0) if + * manageability is enabled, else false (0). + **/ +bool e1000_check_mng_mode_generic(struct e1000_hw *hw) +{ + u32 fwsm = E1000_READ_REG(hw, E1000_FWSM); + + DEBUGFUNC("e1000_check_mng_mode_generic"); + + + return (fwsm & E1000_FWSM_MODE_MASK) == + (E1000_MNG_IAMT_MODE << E1000_FWSM_MODE_SHIFT); +} + +/** + * e1000_enable_tx_pkt_filtering_generic - Enable packet filtering on Tx + * @hw: pointer to the HW structure + * + * Enables packet filtering on transmit packets if manageability is enabled + * and host interface is enabled. + **/ +bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw) +{ + struct e1000_host_mng_dhcp_cookie *hdr = &hw->mng_cookie; + u32 *buffer = (u32 *)&hw->mng_cookie; + u32 offset; + s32 ret_val, hdr_csum, csum; + u8 i, len; + + DEBUGFUNC("e1000_enable_tx_pkt_filtering_generic"); + + hw->mac.tx_pkt_filtering = true; + + /* No manageability, no filtering */ + if (!hw->mac.ops.check_mng_mode(hw)) { + hw->mac.tx_pkt_filtering = false; + return hw->mac.tx_pkt_filtering; + } + + /* If we can't read from the host interface for whatever + * reason, disable filtering. + */ + ret_val = e1000_mng_enable_host_if_generic(hw); + if (ret_val != E1000_SUCCESS) { + hw->mac.tx_pkt_filtering = false; + return hw->mac.tx_pkt_filtering; + } + + /* Read in the header. Length and offset are in dwords. */ + len = E1000_MNG_DHCP_COOKIE_LENGTH >> 2; + offset = E1000_MNG_DHCP_COOKIE_OFFSET >> 2; + for (i = 0; i < len; i++) + *(buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, + offset + i); + hdr_csum = hdr->checksum; + hdr->checksum = 0; + csum = e1000_calculate_checksum((u8 *)hdr, + E1000_MNG_DHCP_COOKIE_LENGTH); + /* If either the checksums or signature don't match, then + * the cookie area isn't considered valid, in which case we + * take the safe route of assuming Tx filtering is enabled. + */ + if ((hdr_csum != csum) || (hdr->signature != E1000_IAMT_SIGNATURE)) { + hw->mac.tx_pkt_filtering = true; + return hw->mac.tx_pkt_filtering; + } + + /* Cookie area is valid, make the final check for filtering. */ + if (!(hdr->status & E1000_MNG_DHCP_COOKIE_STATUS_PARSING)) + hw->mac.tx_pkt_filtering = false; + + return hw->mac.tx_pkt_filtering; +} + +/** + * e1000_mng_write_cmd_header_generic - Writes manageability command header + * @hw: pointer to the HW structure + * @hdr: pointer to the host interface command header + * + * Writes the command header after does the checksum calculation. + **/ +s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr) +{ + u16 i, length = sizeof(struct e1000_host_mng_command_header); + + DEBUGFUNC("e1000_mng_write_cmd_header_generic"); + + /* Write the whole command header structure with new checksum. */ + + hdr->checksum = e1000_calculate_checksum((u8 *)hdr, length); + + length >>= 2; + /* Write the relevant command block into the ram area. */ + for (i = 0; i < length; i++) { + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, + *((u32 *) hdr + i)); + E1000_WRITE_FLUSH(hw); + } + + return E1000_SUCCESS; +} + +/** + * e1000_mng_host_if_write_generic - Write to the manageability host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface buffer + * @length: size of the buffer + * @offset: location in the buffer to write to + * @sum: sum of the data (not checksum) + * + * This function writes the buffer content at the offset given on the host if. + * It also does alignment considerations to do the writes in most efficient + * way. Also fills up the sum of the buffer in *buffer parameter. + **/ +s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, + u16 length, u16 offset, u8 *sum) +{ + u8 *tmp; + u8 *bufptr = buffer; + u32 data = 0; + u16 remaining, i, j, prev_bytes; + + DEBUGFUNC("e1000_mng_host_if_write_generic"); + + /* sum = only sum of the data and it is not checksum */ + + if (length == 0 || offset + length > E1000_HI_MAX_MNG_DATA_LENGTH) + return -E1000_ERR_PARAM; + + tmp = (u8 *)&data; + prev_bytes = offset & 0x3; + offset >>= 2; + + if (prev_bytes) { + data = E1000_READ_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset); + for (j = prev_bytes; j < sizeof(u32); j++) { + *(tmp + j) = *bufptr++; + *sum += *(tmp + j); + } + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset, data); + length -= j - prev_bytes; + offset++; + } + + remaining = length & 0x3; + length -= remaining; + + /* Calculate length in DWORDs */ + length >>= 2; + + /* The device driver writes the relevant command block into the + * ram area. + */ + for (i = 0; i < length; i++) { + for (j = 0; j < sizeof(u32); j++) { + *(tmp + j) = *bufptr++; + *sum += *(tmp + j); + } + + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, + data); + } + if (remaining) { + for (j = 0; j < sizeof(u32); j++) { + if (j < remaining) + *(tmp + j) = *bufptr++; + else + *(tmp + j) = 0; + + *sum += *(tmp + j); + } + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, offset + i, + data); + } + + return E1000_SUCCESS; +} + +/** + * e1000_mng_write_dhcp_info_generic - Writes DHCP info to host interface + * @hw: pointer to the HW structure + * @buffer: pointer to the host interface + * @length: size of the buffer + * + * Writes the DHCP information to the host interface. + **/ +s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, u8 *buffer, + u16 length) +{ + struct e1000_host_mng_command_header hdr; + s32 ret_val; + u32 hicr; + + DEBUGFUNC("e1000_mng_write_dhcp_info_generic"); + + hdr.command_id = E1000_MNG_DHCP_TX_PAYLOAD_CMD; + hdr.command_length = length; + hdr.reserved1 = 0; + hdr.reserved2 = 0; + hdr.checksum = 0; + + /* Enable the host interface */ + ret_val = e1000_mng_enable_host_if_generic(hw); + if (ret_val) + return ret_val; + + /* Populate the host interface with the contents of "buffer". */ + ret_val = e1000_mng_host_if_write_generic(hw, buffer, length, + sizeof(hdr), &(hdr.checksum)); + if (ret_val) + return ret_val; + + /* Write the manageability command header */ + ret_val = e1000_mng_write_cmd_header_generic(hw, &hdr); + if (ret_val) + return ret_val; + + /* Tell the ARC a new command is pending. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); + + return E1000_SUCCESS; +} + +/** + * e1000_enable_mng_pass_thru - Check if management passthrough is needed + * @hw: pointer to the HW structure + * + * Verifies the hardware needs to leave interface enabled so that frames can + * be directed to and from the management interface. + **/ +bool e1000_enable_mng_pass_thru(struct e1000_hw *hw) +{ + u32 manc; + u32 fwsm, factps; + + DEBUGFUNC("e1000_enable_mng_pass_thru"); + + if (!hw->mac.asf_firmware_present) + return false; + + manc = E1000_READ_REG(hw, E1000_MANC); + + if (!(manc & E1000_MANC_RCV_TCO_EN)) + return false; + + if (hw->mac.has_fwsm) { + fwsm = E1000_READ_REG(hw, E1000_FWSM); + factps = E1000_READ_REG(hw, E1000_FACTPS); + + if (!(factps & E1000_FACTPS_MNGCG) && + ((fwsm & E1000_FWSM_MODE_MASK) == + (e1000_mng_mode_pt << E1000_FWSM_MODE_SHIFT))) + return true; + } else if ((manc & E1000_MANC_SMBUS_EN) && + !(manc & E1000_MANC_ASF_EN)) { + return true; + } + + return false; +} + +/** + * e1000_host_interface_command - Writes buffer to host interface + * @hw: pointer to the HW structure + * @buffer: contains a command to write + * @length: the byte length of the buffer, must be multiple of 4 bytes + * + * Writes a buffer to the Host Interface. Upon success, returns E1000_SUCCESS + * else returns E1000_ERR_HOST_INTERFACE_COMMAND. + **/ +s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length) +{ + u32 hicr, i; + + DEBUGFUNC("e1000_host_interface_command"); + + if (!(hw->mac.arc_subsystem_valid)) { + DEBUGOUT("Hardware doesn't support host interface command.\n"); + return E1000_SUCCESS; + } + + if (!hw->mac.asf_firmware_present) { + DEBUGOUT("Firmware is not present.\n"); + return E1000_SUCCESS; + } + + if (length == 0 || length & 0x3 || + length > E1000_HI_MAX_BLOCK_BYTE_LENGTH) { + DEBUGOUT("Buffer length failure.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Check that the host interface is enabled. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_EN)) { + DEBUGOUT("E1000_HOST_EN bit disabled.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Calculate length in DWORDs */ + length >>= 2; + + /* The device driver writes the relevant command block + * into the ram area. + */ + for (i = 0; i < length; i++) + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, i, + *((u32 *)buffer + i)); + + /* Setting this bit tells the ARC that a new command is pending. */ + E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); + + for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_C)) + break; + msec_delay(1); + } + + /* Check command successful completion. */ + if (i == E1000_HI_COMMAND_TIMEOUT || + (!(E1000_READ_REG(hw, E1000_HICR) & E1000_HICR_SV))) { + DEBUGOUT("Command has failed with no status valid.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + for (i = 0; i < length; i++) + *((u32 *)buffer + i) = E1000_READ_REG_ARRAY_DWORD(hw, + E1000_HOST_IF, + i); + + return E1000_SUCCESS; +} +/** + * e1000_load_firmware - Writes proxy FW code buffer to host interface + * and execute. + * @hw: pointer to the HW structure + * @buffer: contains a firmware to write + * @length: the byte length of the buffer, must be multiple of 4 bytes + * + * Upon success returns E1000_SUCCESS, returns E1000_ERR_CONFIG if not enabled + * in HW else returns E1000_ERR_HOST_INTERFACE_COMMAND. + **/ +s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length) +{ + u32 hicr, hibba, fwsm, icr, i; + + DEBUGFUNC("e1000_load_firmware"); + + if (hw->mac.type < e1000_i210) { + DEBUGOUT("Hardware doesn't support loading FW by the driver\n"); + return -E1000_ERR_CONFIG; + } + + /* Check that the host interface is enabled. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_EN)) { + DEBUGOUT("E1000_HOST_EN bit disabled.\n"); + return -E1000_ERR_CONFIG; + } + if (!(hicr & E1000_HICR_MEMORY_BASE_EN)) { + DEBUGOUT("E1000_HICR_MEMORY_BASE_EN bit disabled.\n"); + return -E1000_ERR_CONFIG; + } + + if (length == 0 || length & 0x3 || length > E1000_HI_FW_MAX_LENGTH) { + DEBUGOUT("Buffer length failure.\n"); + return -E1000_ERR_INVALID_ARGUMENT; + } + + /* Clear notification from ROM-FW by reading ICR register */ + icr = E1000_READ_REG(hw, E1000_ICR_V2); + + /* Reset ROM-FW */ + hicr = E1000_READ_REG(hw, E1000_HICR); + hicr |= E1000_HICR_FW_RESET_ENABLE; + E1000_WRITE_REG(hw, E1000_HICR, hicr); + hicr |= E1000_HICR_FW_RESET; + E1000_WRITE_REG(hw, E1000_HICR, hicr); + E1000_WRITE_FLUSH(hw); + + /* Wait till MAC notifies about its readiness after ROM-FW reset */ + for (i = 0; i < (E1000_HI_COMMAND_TIMEOUT * 2); i++) { + icr = E1000_READ_REG(hw, E1000_ICR_V2); + if (icr & E1000_ICR_MNG) + break; + msec_delay(1); + } + + /* Check for timeout */ + if (i == E1000_HI_COMMAND_TIMEOUT) { + DEBUGOUT("FW reset failed.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Wait till MAC is ready to accept new FW code */ + for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { + fwsm = E1000_READ_REG(hw, E1000_FWSM); + if ((fwsm & E1000_FWSM_FW_VALID) && + ((fwsm & E1000_FWSM_MODE_MASK) >> E1000_FWSM_MODE_SHIFT == + E1000_FWSM_HI_EN_ONLY_MODE)) + break; + msec_delay(1); + } + + /* Check for timeout */ + if (i == E1000_HI_COMMAND_TIMEOUT) { + DEBUGOUT("FW reset failed.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + /* Calculate length in DWORDs */ + length >>= 2; + + /* The device driver writes the relevant FW code block + * into the ram area in DWORDs via 1kB ram addressing window. + */ + for (i = 0; i < length; i++) { + if (!(i % E1000_HI_FW_BLOCK_DWORD_LENGTH)) { + /* Point to correct 1kB ram window */ + hibba = E1000_HI_FW_BASE_ADDRESS + + ((E1000_HI_FW_BLOCK_DWORD_LENGTH << 2) * + (i / E1000_HI_FW_BLOCK_DWORD_LENGTH)); + + E1000_WRITE_REG(hw, E1000_HIBBA, hibba); + } + + E1000_WRITE_REG_ARRAY_DWORD(hw, E1000_HOST_IF, + i % E1000_HI_FW_BLOCK_DWORD_LENGTH, + *((u32 *)buffer + i)); + } + + /* Setting this bit tells the ARC that a new FW is ready to execute. */ + hicr = E1000_READ_REG(hw, E1000_HICR); + E1000_WRITE_REG(hw, E1000_HICR, hicr | E1000_HICR_C); + + for (i = 0; i < E1000_HI_COMMAND_TIMEOUT; i++) { + hicr = E1000_READ_REG(hw, E1000_HICR); + if (!(hicr & E1000_HICR_C)) + break; + msec_delay(1); + } + + /* Check for successful FW start. */ + if (i == E1000_HI_COMMAND_TIMEOUT) { + DEBUGOUT("New FW did not start within timeout period.\n"); + return -E1000_ERR_HOST_INTERFACE_COMMAND; + } + + return E1000_SUCCESS; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h new file mode 100644 index 00000000..c94b2185 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_manage.h @@ -0,0 +1,89 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_MANAGE_H_ +#define _E1000_MANAGE_H_ + +bool e1000_check_mng_mode_generic(struct e1000_hw *hw); +bool e1000_enable_tx_pkt_filtering_generic(struct e1000_hw *hw); +s32 e1000_mng_enable_host_if_generic(struct e1000_hw *hw); +s32 e1000_mng_host_if_write_generic(struct e1000_hw *hw, u8 *buffer, + u16 length, u16 offset, u8 *sum); +s32 e1000_mng_write_cmd_header_generic(struct e1000_hw *hw, + struct e1000_host_mng_command_header *hdr); +s32 e1000_mng_write_dhcp_info_generic(struct e1000_hw *hw, + u8 *buffer, u16 length); +bool e1000_enable_mng_pass_thru(struct e1000_hw *hw); +u8 e1000_calculate_checksum(u8 *buffer, u32 length); +s32 e1000_host_interface_command(struct e1000_hw *hw, u8 *buffer, u32 length); +s32 e1000_load_firmware(struct e1000_hw *hw, u8 *buffer, u32 length); + +enum e1000_mng_mode { + e1000_mng_mode_none = 0, + e1000_mng_mode_asf, + e1000_mng_mode_pt, + e1000_mng_mode_ipmi, + e1000_mng_mode_host_if_only +}; + +#define E1000_FACTPS_MNGCG 0x20000000 + +#define E1000_FWSM_MODE_MASK 0xE +#define E1000_FWSM_MODE_SHIFT 1 +#define E1000_FWSM_FW_VALID 0x00008000 +#define E1000_FWSM_HI_EN_ONLY_MODE 0x4 + +#define E1000_MNG_IAMT_MODE 0x3 +#define E1000_MNG_DHCP_COOKIE_LENGTH 0x10 +#define E1000_MNG_DHCP_COOKIE_OFFSET 0x6F0 +#define E1000_MNG_DHCP_COMMAND_TIMEOUT 10 +#define E1000_MNG_DHCP_TX_PAYLOAD_CMD 64 +#define E1000_MNG_DHCP_COOKIE_STATUS_PARSING 0x1 +#define E1000_MNG_DHCP_COOKIE_STATUS_VLAN 0x2 + +#define E1000_VFTA_ENTRY_SHIFT 5 +#define E1000_VFTA_ENTRY_MASK 0x7F +#define E1000_VFTA_ENTRY_BIT_SHIFT_MASK 0x1F + +#define E1000_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ +#define E1000_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ +#define E1000_HI_COMMAND_TIMEOUT 500 /* Process HI cmd limit */ +#define E1000_HI_FW_BASE_ADDRESS 0x10000 +#define E1000_HI_FW_MAX_LENGTH (64 * 1024) /* Num of bytes */ +#define E1000_HI_FW_BLOCK_DWORD_LENGTH 256 /* Num of DWORDs per page */ +#define E1000_HICR_MEMORY_BASE_EN 0x200 /* MB Enable bit - RO */ +#define E1000_HICR_EN 0x01 /* Enable bit - RO */ +/* Driver sets this bit when done to put command in RAM */ +#define E1000_HICR_C 0x02 +#define E1000_HICR_SV 0x04 /* Status Validity */ +#define E1000_HICR_FW_RESET_ENABLE 0x40 +#define E1000_HICR_FW_RESET 0x80 + +/* Intel(R) Active Management Technology signature */ +#define E1000_IAMT_SIGNATURE 0x544D4149 + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c new file mode 100644 index 00000000..3ef0d98b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.c @@ -0,0 +1,525 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_mbx.h" + +/** + * e1000_null_mbx_check_for_flag - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +static s32 e1000_null_mbx_check_for_flag(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG mbx_id) +{ + DEBUGFUNC("e1000_null_mbx_check_flag"); + + return E1000_SUCCESS; +} + +/** + * e1000_null_mbx_transact - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +static s32 e1000_null_mbx_transact(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG *msg, + u16 E1000_UNUSEDARG size, + u16 E1000_UNUSEDARG mbx_id) +{ + DEBUGFUNC("e1000_null_mbx_rw_msg"); + + return E1000_SUCCESS; +} + +/** + * e1000_read_mbx - Reads a message from the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to read + * + * returns SUCCESS if it successfully read message from buffer + **/ +s32 e1000_read_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_read_mbx"); + + /* limit read to size of mailbox */ + if (size > mbx->size) + size = mbx->size; + + if (mbx->ops.read) + ret_val = mbx->ops.read(hw, msg, size, mbx_id); + + return ret_val; +} + +/** + * e1000_write_mbx - Write a message to the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully copied message into the buffer + **/ +s32 e1000_write_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_write_mbx"); + + if (size > mbx->size) + ret_val = -E1000_ERR_MBX; + + else if (mbx->ops.write) + ret_val = mbx->ops.write(hw, msg, size, mbx_id); + + return ret_val; +} + +/** + * e1000_check_for_msg - checks to see if someone sent us mail + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to check + * + * returns SUCCESS if the Status bit was found or else ERR_MBX + **/ +s32 e1000_check_for_msg(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_msg"); + + if (mbx->ops.check_for_msg) + ret_val = mbx->ops.check_for_msg(hw, mbx_id); + + return ret_val; +} + +/** + * e1000_check_for_ack - checks to see if someone sent us ACK + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to check + * + * returns SUCCESS if the Status bit was found or else ERR_MBX + **/ +s32 e1000_check_for_ack(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_ack"); + + if (mbx->ops.check_for_ack) + ret_val = mbx->ops.check_for_ack(hw, mbx_id); + + return ret_val; +} + +/** + * e1000_check_for_rst - checks to see if other side has reset + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to check + * + * returns SUCCESS if the Status bit was found or else ERR_MBX + **/ +s32 e1000_check_for_rst(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_rst"); + + if (mbx->ops.check_for_rst) + ret_val = mbx->ops.check_for_rst(hw, mbx_id); + + return ret_val; +} + +/** + * e1000_poll_for_msg - Wait for message notification + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully received a message notification + **/ +static s32 e1000_poll_for_msg(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + int countdown = mbx->timeout; + + DEBUGFUNC("e1000_poll_for_msg"); + + if (!countdown || !mbx->ops.check_for_msg) + goto out; + + while (countdown && mbx->ops.check_for_msg(hw, mbx_id)) { + countdown--; + if (!countdown) + break; + usec_delay(mbx->usec_delay); + } + + /* if we failed, all future posted messages fail until reset */ + if (!countdown) + mbx->timeout = 0; +out: + return countdown ? E1000_SUCCESS : -E1000_ERR_MBX; +} + +/** + * e1000_poll_for_ack - Wait for message acknowledgement + * @hw: pointer to the HW structure + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully received a message acknowledgement + **/ +static s32 e1000_poll_for_ack(struct e1000_hw *hw, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + int countdown = mbx->timeout; + + DEBUGFUNC("e1000_poll_for_ack"); + + if (!countdown || !mbx->ops.check_for_ack) + goto out; + + while (countdown && mbx->ops.check_for_ack(hw, mbx_id)) { + countdown--; + if (!countdown) + break; + usec_delay(mbx->usec_delay); + } + + /* if we failed, all future posted messages fail until reset */ + if (!countdown) + mbx->timeout = 0; +out: + return countdown ? E1000_SUCCESS : -E1000_ERR_MBX; +} + +/** + * e1000_read_posted_mbx - Wait for message notification and receive message + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully received a message notification and + * copied it into the receive buffer. + **/ +s32 e1000_read_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_read_posted_mbx"); + + if (!mbx->ops.read) + goto out; + + ret_val = e1000_poll_for_msg(hw, mbx_id); + + /* if ack received read message, otherwise we timed out */ + if (!ret_val) + ret_val = mbx->ops.read(hw, msg, size, mbx_id); +out: + return ret_val; +} + +/** + * e1000_write_posted_mbx - Write a message to the mailbox, wait for ack + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @mbx_id: id of mailbox to write + * + * returns SUCCESS if it successfully copied message into the buffer and + * received an ack to that message within delay * timeout period + **/ +s32 e1000_write_posted_mbx(struct e1000_hw *hw, u32 *msg, u16 size, u16 mbx_id) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_write_posted_mbx"); + + /* exit if either we can't write or there isn't a defined timeout */ + if (!mbx->ops.write || !mbx->timeout) + goto out; + + /* send msg */ + ret_val = mbx->ops.write(hw, msg, size, mbx_id); + + /* if msg sent wait until we receive an ack */ + if (!ret_val) + ret_val = e1000_poll_for_ack(hw, mbx_id); +out: + return ret_val; +} + +/** + * e1000_init_mbx_ops_generic - Initialize mbx function pointers + * @hw: pointer to the HW structure + * + * Sets the function pointers to no-op functions + **/ +void e1000_init_mbx_ops_generic(struct e1000_hw *hw) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + mbx->ops.init_params = e1000_null_ops_generic; + mbx->ops.read = e1000_null_mbx_transact; + mbx->ops.write = e1000_null_mbx_transact; + mbx->ops.check_for_msg = e1000_null_mbx_check_for_flag; + mbx->ops.check_for_ack = e1000_null_mbx_check_for_flag; + mbx->ops.check_for_rst = e1000_null_mbx_check_for_flag; + mbx->ops.read_posted = e1000_read_posted_mbx; + mbx->ops.write_posted = e1000_write_posted_mbx; +} + +static s32 e1000_check_for_bit_pf(struct e1000_hw *hw, u32 mask) +{ + u32 mbvficr = E1000_READ_REG(hw, E1000_MBVFICR); + s32 ret_val = -E1000_ERR_MBX; + + if (mbvficr & mask) { + ret_val = E1000_SUCCESS; + E1000_WRITE_REG(hw, E1000_MBVFICR, mask); + } + + return ret_val; +} + +/** + * e1000_check_for_msg_pf - checks to see if the VF has sent mail + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * returns SUCCESS if the VF has set the Status bit or else ERR_MBX + **/ +static s32 e1000_check_for_msg_pf(struct e1000_hw *hw, u16 vf_number) +{ + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_msg_pf"); + + if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFREQ_VF1 << vf_number)) { + ret_val = E1000_SUCCESS; + hw->mbx.stats.reqs++; + } + + return ret_val; +} + +/** + * e1000_check_for_ack_pf - checks to see if the VF has ACKed + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * returns SUCCESS if the VF has set the Status bit or else ERR_MBX + **/ +static s32 e1000_check_for_ack_pf(struct e1000_hw *hw, u16 vf_number) +{ + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_ack_pf"); + + if (!e1000_check_for_bit_pf(hw, E1000_MBVFICR_VFACK_VF1 << vf_number)) { + ret_val = E1000_SUCCESS; + hw->mbx.stats.acks++; + } + + return ret_val; +} + +/** + * e1000_check_for_rst_pf - checks to see if the VF has reset + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * returns SUCCESS if the VF has set the Status bit or else ERR_MBX + **/ +static s32 e1000_check_for_rst_pf(struct e1000_hw *hw, u16 vf_number) +{ + u32 vflre = E1000_READ_REG(hw, E1000_VFLRE); + s32 ret_val = -E1000_ERR_MBX; + + DEBUGFUNC("e1000_check_for_rst_pf"); + + if (vflre & (1 << vf_number)) { + ret_val = E1000_SUCCESS; + E1000_WRITE_REG(hw, E1000_VFLRE, (1 << vf_number)); + hw->mbx.stats.rsts++; + } + + return ret_val; +} + +/** + * e1000_obtain_mbx_lock_pf - obtain mailbox lock + * @hw: pointer to the HW structure + * @vf_number: the VF index + * + * return SUCCESS if we obtained the mailbox lock + **/ +static s32 e1000_obtain_mbx_lock_pf(struct e1000_hw *hw, u16 vf_number) +{ + s32 ret_val = -E1000_ERR_MBX; + u32 p2v_mailbox; + + DEBUGFUNC("e1000_obtain_mbx_lock_pf"); + + /* Take ownership of the buffer */ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_PFU); + + /* reserve mailbox for vf use */ + p2v_mailbox = E1000_READ_REG(hw, E1000_P2VMAILBOX(vf_number)); + if (p2v_mailbox & E1000_P2VMAILBOX_PFU) + ret_val = E1000_SUCCESS; + + return ret_val; +} + +/** + * e1000_write_mbx_pf - Places a message in the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @vf_number: the VF index + * + * returns SUCCESS if it successfully copied message into the buffer + **/ +static s32 e1000_write_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, + u16 vf_number) +{ + s32 ret_val; + u16 i; + + DEBUGFUNC("e1000_write_mbx_pf"); + + /* lock the mailbox to prevent pf/vf race condition */ + ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number); + if (ret_val) + goto out_no_write; + + /* flush msg and acks as we are overwriting the message buffer */ + e1000_check_for_msg_pf(hw, vf_number); + e1000_check_for_ack_pf(hw, vf_number); + + /* copy the caller specified message to the mailbox memory buffer */ + for (i = 0; i < size; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i, msg[i]); + + /* Interrupt VF to tell it a message has been sent and release buffer*/ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_STS); + + /* update stats */ + hw->mbx.stats.msgs_tx++; + +out_no_write: + return ret_val; + +} + +/** + * e1000_read_mbx_pf - Read a message from the mailbox + * @hw: pointer to the HW structure + * @msg: The message buffer + * @size: Length of buffer + * @vf_number: the VF index + * + * This function copies a message from the mailbox buffer to the caller's + * memory buffer. The presumption is that the caller knows that there was + * a message due to a VF request so no polling for message is needed. + **/ +static s32 e1000_read_mbx_pf(struct e1000_hw *hw, u32 *msg, u16 size, + u16 vf_number) +{ + s32 ret_val; + u16 i; + + DEBUGFUNC("e1000_read_mbx_pf"); + + /* lock the mailbox to prevent pf/vf race condition */ + ret_val = e1000_obtain_mbx_lock_pf(hw, vf_number); + if (ret_val) + goto out_no_read; + + /* copy the message to the mailbox memory buffer */ + for (i = 0; i < size; i++) + msg[i] = E1000_READ_REG_ARRAY(hw, E1000_VMBMEM(vf_number), i); + + /* Acknowledge the message and release buffer */ + E1000_WRITE_REG(hw, E1000_P2VMAILBOX(vf_number), E1000_P2VMAILBOX_ACK); + + /* update stats */ + hw->mbx.stats.msgs_rx++; + +out_no_read: + return ret_val; +} + +/** + * e1000_init_mbx_params_pf - set initial values for pf mailbox + * @hw: pointer to the HW structure + * + * Initializes the hw->mbx struct to correct values for pf mailbox + */ +s32 e1000_init_mbx_params_pf(struct e1000_hw *hw) +{ + struct e1000_mbx_info *mbx = &hw->mbx; + + switch (hw->mac.type) { + case e1000_82576: + case e1000_i350: + case e1000_i354: + mbx->timeout = 0; + mbx->usec_delay = 0; + + mbx->size = E1000_VFMAILBOX_SIZE; + + mbx->ops.read = e1000_read_mbx_pf; + mbx->ops.write = e1000_write_mbx_pf; + mbx->ops.read_posted = e1000_read_posted_mbx; + mbx->ops.write_posted = e1000_write_posted_mbx; + mbx->ops.check_for_msg = e1000_check_for_msg_pf; + mbx->ops.check_for_ack = e1000_check_for_ack_pf; + mbx->ops.check_for_rst = e1000_check_for_rst_pf; + + mbx->stats.msgs_tx = 0; + mbx->stats.msgs_rx = 0; + mbx->stats.reqs = 0; + mbx->stats.acks = 0; + mbx->stats.rsts = 0; + default: + return E1000_SUCCESS; + } +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h new file mode 100644 index 00000000..bbf838c8 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_mbx.h @@ -0,0 +1,87 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_MBX_H_ +#define _E1000_MBX_H_ + +#include "e1000_api.h" + +#define E1000_P2VMAILBOX_STS 0x00000001 /* Initiate message send to VF */ +#define E1000_P2VMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ +#define E1000_P2VMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define E1000_P2VMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ + +#define E1000_MBVFICR_VFREQ_MASK 0x000000FF /* bits for VF messages */ +#define E1000_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ +#define E1000_MBVFICR_VFACK_MASK 0x00FF0000 /* bits for VF acks */ +#define E1000_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ + +#define E1000_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ + +/* If it's a E1000_VF_* msg then it originates in the VF and is sent to the + * PF. The reverse is true if it is E1000_PF_*. + * Message ACK's are the value or'd with 0xF0000000 + */ +/* Msgs below or'd with this are the ACK */ +#define E1000_VT_MSGTYPE_ACK 0x80000000 +/* Msgs below or'd with this are the NACK */ +#define E1000_VT_MSGTYPE_NACK 0x40000000 +/* Indicates that VF is still clear to send requests */ +#define E1000_VT_MSGTYPE_CTS 0x20000000 +#define E1000_VT_MSGINFO_SHIFT 16 +/* bits 23:16 are used for extra info for certain messages */ +#define E1000_VT_MSGINFO_MASK (0xFF << E1000_VT_MSGINFO_SHIFT) + +#define E1000_VF_RESET 0x01 /* VF requests reset */ +#define E1000_VF_SET_MAC_ADDR 0x02 /* VF requests to set MAC addr */ +#define E1000_VF_SET_MULTICAST 0x03 /* VF requests to set MC addr */ +#define E1000_VF_SET_MULTICAST_COUNT_MASK (0x1F << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_MULTICAST_OVERFLOW (0x80 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_VLAN 0x04 /* VF requests to set VLAN */ +#define E1000_VF_SET_VLAN_ADD (0x01 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_LPE 0x05 /* reqs to set VMOLR.LPE */ +#define E1000_VF_SET_PROMISC 0x06 /* reqs to clear VMOLR.ROPE/MPME*/ +#define E1000_VF_SET_PROMISC_UNICAST (0x01 << E1000_VT_MSGINFO_SHIFT) +#define E1000_VF_SET_PROMISC_MULTICAST (0x02 << E1000_VT_MSGINFO_SHIFT) + +#define E1000_PF_CONTROL_MSG 0x0100 /* PF control message */ + +#define E1000_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ +#define E1000_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ + +s32 e1000_read_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_write_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_read_posted_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_write_posted_mbx(struct e1000_hw *, u32 *, u16, u16); +s32 e1000_check_for_msg(struct e1000_hw *, u16); +s32 e1000_check_for_ack(struct e1000_hw *, u16); +s32 e1000_check_for_rst(struct e1000_hw *, u16); +void e1000_init_mbx_ops_generic(struct e1000_hw *hw); +s32 e1000_init_mbx_params_pf(struct e1000_hw *); + +#endif /* _E1000_MBX_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c new file mode 100644 index 00000000..6188d007 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.c @@ -0,0 +1,965 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +static void e1000_reload_nvm_generic(struct e1000_hw *hw); + +/** + * e1000_init_nvm_ops_generic - Initialize NVM function pointers + * @hw: pointer to the HW structure + * + * Setups up the function pointers to no-op functions + **/ +void e1000_init_nvm_ops_generic(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + DEBUGFUNC("e1000_init_nvm_ops_generic"); + + /* Initialize function pointers */ + nvm->ops.init_params = e1000_null_ops_generic; + nvm->ops.acquire = e1000_null_ops_generic; + nvm->ops.read = e1000_null_read_nvm; + nvm->ops.release = e1000_null_nvm_generic; + nvm->ops.reload = e1000_reload_nvm_generic; + nvm->ops.update = e1000_null_ops_generic; + nvm->ops.valid_led_default = e1000_null_led_default; + nvm->ops.validate = e1000_null_ops_generic; + nvm->ops.write = e1000_null_write_nvm; +} + +/** + * e1000_null_nvm_read - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_read_nvm(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, + u16 E1000_UNUSEDARG *c) +{ + DEBUGFUNC("e1000_null_read_nvm"); + return E1000_SUCCESS; +} + +/** + * e1000_null_nvm_generic - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_nvm_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_nvm_generic"); + return; +} + +/** + * e1000_null_led_default - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_led_default(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG *data) +{ + DEBUGFUNC("e1000_null_led_default"); + return E1000_SUCCESS; +} + +/** + * e1000_null_write_nvm - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_write_nvm(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG a, u16 E1000_UNUSEDARG b, + u16 E1000_UNUSEDARG *c) +{ + DEBUGFUNC("e1000_null_write_nvm"); + return E1000_SUCCESS; +} + +/** + * e1000_raise_eec_clk - Raise EEPROM clock + * @hw: pointer to the HW structure + * @eecd: pointer to the EEPROM + * + * Enable/Raise the EEPROM clock bit. + **/ +static void e1000_raise_eec_clk(struct e1000_hw *hw, u32 *eecd) +{ + *eecd = *eecd | E1000_EECD_SK; + E1000_WRITE_REG(hw, E1000_EECD, *eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(hw->nvm.delay_usec); +} + +/** + * e1000_lower_eec_clk - Lower EEPROM clock + * @hw: pointer to the HW structure + * @eecd: pointer to the EEPROM + * + * Clear/Lower the EEPROM clock bit. + **/ +static void e1000_lower_eec_clk(struct e1000_hw *hw, u32 *eecd) +{ + *eecd = *eecd & ~E1000_EECD_SK; + E1000_WRITE_REG(hw, E1000_EECD, *eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(hw->nvm.delay_usec); +} + +/** + * e1000_shift_out_eec_bits - Shift data bits our to the EEPROM + * @hw: pointer to the HW structure + * @data: data to send to the EEPROM + * @count: number of bits to shift out + * + * We need to shift 'count' bits out to the EEPROM. So, the value in the + * "data" parameter will be shifted out to the EEPROM one bit at a time. + * In order to do this, "data" must be broken down into bits. + **/ +static void e1000_shift_out_eec_bits(struct e1000_hw *hw, u16 data, u16 count) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + u32 mask; + + DEBUGFUNC("e1000_shift_out_eec_bits"); + + mask = 0x01 << (count - 1); + if (nvm->type == e1000_nvm_eeprom_spi) + eecd |= E1000_EECD_DO; + + do { + eecd &= ~E1000_EECD_DI; + + if (data & mask) + eecd |= E1000_EECD_DI; + + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + + usec_delay(nvm->delay_usec); + + e1000_raise_eec_clk(hw, &eecd); + e1000_lower_eec_clk(hw, &eecd); + + mask >>= 1; + } while (mask); + + eecd &= ~E1000_EECD_DI; + E1000_WRITE_REG(hw, E1000_EECD, eecd); +} + +/** + * e1000_shift_in_eec_bits - Shift data bits in from the EEPROM + * @hw: pointer to the HW structure + * @count: number of bits to shift in + * + * In order to read a register from the EEPROM, we need to shift 'count' bits + * in from the EEPROM. Bits are "shifted in" by raising the clock input to + * the EEPROM (setting the SK bit), and then reading the value of the data out + * "DO" bit. During this "shifting in" process the data in "DI" bit should + * always be clear. + **/ +static u16 e1000_shift_in_eec_bits(struct e1000_hw *hw, u16 count) +{ + u32 eecd; + u32 i; + u16 data; + + DEBUGFUNC("e1000_shift_in_eec_bits"); + + eecd = E1000_READ_REG(hw, E1000_EECD); + + eecd &= ~(E1000_EECD_DO | E1000_EECD_DI); + data = 0; + + for (i = 0; i < count; i++) { + data <<= 1; + e1000_raise_eec_clk(hw, &eecd); + + eecd = E1000_READ_REG(hw, E1000_EECD); + + eecd &= ~E1000_EECD_DI; + if (eecd & E1000_EECD_DO) + data |= 1; + + e1000_lower_eec_clk(hw, &eecd); + } + + return data; +} + +/** + * e1000_poll_eerd_eewr_done - Poll for EEPROM read/write completion + * @hw: pointer to the HW structure + * @ee_reg: EEPROM flag for polling + * + * Polls the EEPROM status bit for either read or write completion based + * upon the value of 'ee_reg'. + **/ +s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg) +{ + u32 attempts = 100000; + u32 i, reg = 0; + + DEBUGFUNC("e1000_poll_eerd_eewr_done"); + + for (i = 0; i < attempts; i++) { + if (ee_reg == E1000_NVM_POLL_READ) + reg = E1000_READ_REG(hw, E1000_EERD); + else + reg = E1000_READ_REG(hw, E1000_EEWR); + + if (reg & E1000_NVM_RW_REG_DONE) + return E1000_SUCCESS; + + usec_delay(5); + } + + return -E1000_ERR_NVM; +} + +/** + * e1000_acquire_nvm_generic - Generic request for access to EEPROM + * @hw: pointer to the HW structure + * + * Set the EEPROM access request bit and wait for EEPROM access grant bit. + * Return successful if access grant bit set, else clear the request for + * EEPROM access and return -E1000_ERR_NVM (-1). + **/ +s32 e1000_acquire_nvm_generic(struct e1000_hw *hw) +{ + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + s32 timeout = E1000_NVM_GRANT_ATTEMPTS; + + DEBUGFUNC("e1000_acquire_nvm_generic"); + + E1000_WRITE_REG(hw, E1000_EECD, eecd | E1000_EECD_REQ); + eecd = E1000_READ_REG(hw, E1000_EECD); + + while (timeout) { + if (eecd & E1000_EECD_GNT) + break; + usec_delay(5); + eecd = E1000_READ_REG(hw, E1000_EECD); + timeout--; + } + + if (!timeout) { + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, E1000_EECD, eecd); + DEBUGOUT("Could not acquire NVM grant\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_standby_nvm - Return EEPROM to standby state + * @hw: pointer to the HW structure + * + * Return the EEPROM to a standby state. + **/ +static void e1000_standby_nvm(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + + DEBUGFUNC("e1000_standby_nvm"); + + if (nvm->type == e1000_nvm_eeprom_spi) { + /* Toggle CS to flush commands */ + eecd |= E1000_EECD_CS; + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(nvm->delay_usec); + eecd &= ~E1000_EECD_CS; + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(nvm->delay_usec); + } +} + +/** + * e1000_stop_nvm - Terminate EEPROM command + * @hw: pointer to the HW structure + * + * Terminates the current command by inverting the EEPROM's chip select pin. + **/ +static void e1000_stop_nvm(struct e1000_hw *hw) +{ + u32 eecd; + + DEBUGFUNC("e1000_stop_nvm"); + + eecd = E1000_READ_REG(hw, E1000_EECD); + if (hw->nvm.type == e1000_nvm_eeprom_spi) { + /* Pull CS high */ + eecd |= E1000_EECD_CS; + e1000_lower_eec_clk(hw, &eecd); + } +} + +/** + * e1000_release_nvm_generic - Release exclusive access to EEPROM + * @hw: pointer to the HW structure + * + * Stop any current commands to the EEPROM and clear the EEPROM request bit. + **/ +void e1000_release_nvm_generic(struct e1000_hw *hw) +{ + u32 eecd; + + DEBUGFUNC("e1000_release_nvm_generic"); + + e1000_stop_nvm(hw); + + eecd = E1000_READ_REG(hw, E1000_EECD); + eecd &= ~E1000_EECD_REQ; + E1000_WRITE_REG(hw, E1000_EECD, eecd); +} + +/** + * e1000_ready_nvm_eeprom - Prepares EEPROM for read/write + * @hw: pointer to the HW structure + * + * Setups the EEPROM for reading and writing. + **/ +static s32 e1000_ready_nvm_eeprom(struct e1000_hw *hw) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 eecd = E1000_READ_REG(hw, E1000_EECD); + u8 spi_stat_reg; + + DEBUGFUNC("e1000_ready_nvm_eeprom"); + + if (nvm->type == e1000_nvm_eeprom_spi) { + u16 timeout = NVM_MAX_RETRY_SPI; + + /* Clear SK and CS */ + eecd &= ~(E1000_EECD_CS | E1000_EECD_SK); + E1000_WRITE_REG(hw, E1000_EECD, eecd); + E1000_WRITE_FLUSH(hw); + usec_delay(1); + + /* Read "Status Register" repeatedly until the LSB is cleared. + * The EEPROM will signal that the command has been completed + * by clearing bit 0 of the internal status register. If it's + * not cleared within 'timeout', then error out. + */ + while (timeout) { + e1000_shift_out_eec_bits(hw, NVM_RDSR_OPCODE_SPI, + hw->nvm.opcode_bits); + spi_stat_reg = (u8)e1000_shift_in_eec_bits(hw, 8); + if (!(spi_stat_reg & NVM_STATUS_RDY_SPI)) + break; + + usec_delay(5); + e1000_standby_nvm(hw); + timeout--; + } + + if (!timeout) { + DEBUGOUT("SPI NVM Status error\n"); + return -E1000_ERR_NVM; + } + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_nvm_spi - Read EEPROM's using SPI + * @hw: pointer to the HW structure + * @offset: offset of word in the EEPROM to read + * @words: number of words to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM. + **/ +s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 i = 0; + s32 ret_val; + u16 word_in; + u8 read_opcode = NVM_READ_OPCODE_SPI; + + DEBUGFUNC("e1000_read_nvm_spi"); + + /* A check for invalid values: offset too large, too many words, + * and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + return -E1000_ERR_NVM; + } + + ret_val = nvm->ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_ready_nvm_eeprom(hw); + if (ret_val) + goto release; + + e1000_standby_nvm(hw); + + if ((nvm->address_bits == 8) && (offset >= 128)) + read_opcode |= NVM_A8_OPCODE_SPI; + + /* Send the READ command (opcode + addr) */ + e1000_shift_out_eec_bits(hw, read_opcode, nvm->opcode_bits); + e1000_shift_out_eec_bits(hw, (u16)(offset*2), nvm->address_bits); + + /* Read the data. SPI NVMs increment the address with each byte + * read and will roll over if reading beyond the end. This allows + * us to read the whole NVM from any offset + */ + for (i = 0; i < words; i++) { + word_in = e1000_shift_in_eec_bits(hw, 16); + data[i] = (word_in >> 8) | (word_in << 8); + } + +release: + nvm->ops.release(hw); + + return ret_val; +} + +/** + * e1000_read_nvm_eerd - Reads EEPROM using EERD register + * @hw: pointer to the HW structure + * @offset: offset of word in the EEPROM to read + * @words: number of words to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + **/ +s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + u32 i, eerd = 0; + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_read_nvm_eerd"); + + /* A check for invalid values: offset too large, too many words, + * too many words for the offset, and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + return -E1000_ERR_NVM; + } + + for (i = 0; i < words; i++) { + eerd = ((offset+i) << E1000_NVM_RW_ADDR_SHIFT) + + E1000_NVM_RW_REG_START; + + E1000_WRITE_REG(hw, E1000_EERD, eerd); + ret_val = e1000_poll_eerd_eewr_done(hw, E1000_NVM_POLL_READ); + if (ret_val) + break; + + data[i] = (E1000_READ_REG(hw, E1000_EERD) >> + E1000_NVM_RW_REG_DATA); + } + + return ret_val; +} + +/** + * e1000_write_nvm_spi - Write to EEPROM using SPI + * @hw: pointer to the HW structure + * @offset: offset within the EEPROM to be written to + * @words: number of words to write + * @data: 16 bit word(s) to be written to the EEPROM + * + * Writes data to EEPROM at offset using SPI interface. + * + * If e1000_update_nvm_checksum is not called after this function , the + * EEPROM will most likely contain an invalid checksum. + **/ +s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data) +{ + struct e1000_nvm_info *nvm = &hw->nvm; + s32 ret_val = -E1000_ERR_NVM; + u16 widx = 0; + + DEBUGFUNC("e1000_write_nvm_spi"); + + /* A check for invalid values: offset too large, too many words, + * and not enough words. + */ + if ((offset >= nvm->word_size) || (words > (nvm->word_size - offset)) || + (words == 0)) { + DEBUGOUT("nvm parameter(s) out of bounds\n"); + return -E1000_ERR_NVM; + } + + while (widx < words) { + u8 write_opcode = NVM_WRITE_OPCODE_SPI; + + ret_val = nvm->ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_ready_nvm_eeprom(hw); + if (ret_val) { + nvm->ops.release(hw); + return ret_val; + } + + e1000_standby_nvm(hw); + + /* Send the WRITE ENABLE command (8 bit opcode) */ + e1000_shift_out_eec_bits(hw, NVM_WREN_OPCODE_SPI, + nvm->opcode_bits); + + e1000_standby_nvm(hw); + + /* Some SPI eeproms use the 8th address bit embedded in the + * opcode + */ + if ((nvm->address_bits == 8) && (offset >= 128)) + write_opcode |= NVM_A8_OPCODE_SPI; + + /* Send the Write command (8-bit opcode + addr) */ + e1000_shift_out_eec_bits(hw, write_opcode, nvm->opcode_bits); + e1000_shift_out_eec_bits(hw, (u16)((offset + widx) * 2), + nvm->address_bits); + + /* Loop to allow for up to whole page write of eeprom */ + while (widx < words) { + u16 word_out = data[widx]; + word_out = (word_out >> 8) | (word_out << 8); + e1000_shift_out_eec_bits(hw, word_out, 16); + widx++; + + if ((((offset + widx) * 2) % nvm->page_size) == 0) { + e1000_standby_nvm(hw); + break; + } + } + msec_delay(10); + nvm->ops.release(hw); + } + + return ret_val; +} + +/** + * e1000_read_pba_string_generic - Read device part number + * @hw: pointer to the HW structure + * @pba_num: pointer to device part number + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number from the EEPROM and stores + * the value in pba_num. + **/ +s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, + u32 pba_num_size) +{ + s32 ret_val; + u16 nvm_data; + u16 pba_ptr; + u16 offset; + u16 length; + + DEBUGFUNC("e1000_read_pba_string_generic"); + + if (pba_num == NULL) { + DEBUGOUT("PBA string buffer was null\n"); + return -E1000_ERR_INVALID_ARGUMENT; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + /* if nvm_data is not ptr guard the PBA must be in legacy format which + * means pba_ptr is actually our second data word for the PBA number + * and we can decode it into an ascii string + */ + if (nvm_data != NVM_PBA_PTR_GUARD) { + DEBUGOUT("NVM PBA number is not stored as string\n"); + + /* make sure callers buffer is big enough to store the PBA */ + if (pba_num_size < E1000_PBANUM_LENGTH) { + DEBUGOUT("PBA string buffer too small\n"); + return E1000_ERR_NO_SPACE; + } + + /* extract hex string from data and pba_ptr */ + pba_num[0] = (nvm_data >> 12) & 0xF; + pba_num[1] = (nvm_data >> 8) & 0xF; + pba_num[2] = (nvm_data >> 4) & 0xF; + pba_num[3] = nvm_data & 0xF; + pba_num[4] = (pba_ptr >> 12) & 0xF; + pba_num[5] = (pba_ptr >> 8) & 0xF; + pba_num[6] = '-'; + pba_num[7] = 0; + pba_num[8] = (pba_ptr >> 4) & 0xF; + pba_num[9] = pba_ptr & 0xF; + + /* put a null character on the end of our string */ + pba_num[10] = '\0'; + + /* switch all the data but the '-' to hex char */ + for (offset = 0; offset < 10; offset++) { + if (pba_num[offset] < 0xA) + pba_num[offset] += '0'; + else if (pba_num[offset] < 0x10) + pba_num[offset] += 'A' - 0xA; + } + + return E1000_SUCCESS; + } + + ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (length == 0xFFFF || length == 0) { + DEBUGOUT("NVM PBA number section invalid length\n"); + return -E1000_ERR_NVM_PBA_SECTION; + } + /* check if pba_num buffer is big enough */ + if (pba_num_size < (((u32)length * 2) - 1)) { + DEBUGOUT("PBA string buffer too small\n"); + return -E1000_ERR_NO_SPACE; + } + + /* trim pba length from start of string */ + pba_ptr++; + length--; + + for (offset = 0; offset < length; offset++) { + ret_val = hw->nvm.ops.read(hw, pba_ptr + offset, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + pba_num[offset * 2] = (u8)(nvm_data >> 8); + pba_num[(offset * 2) + 1] = (u8)(nvm_data & 0xFF); + } + pba_num[offset * 2] = '\0'; + + return E1000_SUCCESS; +} + +/** + * e1000_read_pba_length_generic - Read device part number length + * @hw: pointer to the HW structure + * @pba_num_size: size of part number buffer + * + * Reads the product board assembly (PBA) number length from the EEPROM and + * stores the value in pba_num_size. + **/ +s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size) +{ + s32 ret_val; + u16 nvm_data; + u16 pba_ptr; + u16 length; + + DEBUGFUNC("e1000_read_pba_length_generic"); + + if (pba_num_size == NULL) { + DEBUGOUT("PBA buffer size was null\n"); + return -E1000_ERR_INVALID_ARGUMENT; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_0, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + ret_val = hw->nvm.ops.read(hw, NVM_PBA_OFFSET_1, 1, &pba_ptr); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + /* if data is not ptr guard the PBA must be in legacy format */ + if (nvm_data != NVM_PBA_PTR_GUARD) { + *pba_num_size = E1000_PBANUM_LENGTH; + return E1000_SUCCESS; + } + + ret_val = hw->nvm.ops.read(hw, pba_ptr, 1, &length); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + + if (length == 0xFFFF || length == 0) { + DEBUGOUT("NVM PBA number section invalid length\n"); + return -E1000_ERR_NVM_PBA_SECTION; + } + + /* Convert from length in u16 values to u8 chars, add 1 for NULL, + * and subtract 2 because length field is included in length. + */ + *pba_num_size = ((u32)length * 2) - 1; + + return E1000_SUCCESS; +} + + + + + +/** + * e1000_read_mac_addr_generic - Read device MAC address + * @hw: pointer to the HW structure + * + * Reads the device MAC address from the EEPROM and stores the value. + * Since devices with two ports use the same EEPROM, we increment the + * last bit in the MAC address for the second port. + **/ +s32 e1000_read_mac_addr_generic(struct e1000_hw *hw) +{ + u32 rar_high; + u32 rar_low; + u16 i; + + rar_high = E1000_READ_REG(hw, E1000_RAH(0)); + rar_low = E1000_READ_REG(hw, E1000_RAL(0)); + + for (i = 0; i < E1000_RAL_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i] = (u8)(rar_low >> (i*8)); + + for (i = 0; i < E1000_RAH_MAC_ADDR_LEN; i++) + hw->mac.perm_addr[i+4] = (u8)(rar_high >> (i*8)); + + for (i = 0; i < ETH_ADDR_LEN; i++) + hw->mac.addr[i] = hw->mac.perm_addr[i]; + + return E1000_SUCCESS; +} + +/** + * e1000_validate_nvm_checksum_generic - Validate EEPROM checksum + * @hw: pointer to the HW structure + * + * Calculates the EEPROM checksum by reading/adding each word of the EEPROM + * and then verifies that the sum of the EEPROM is equal to 0xBABA. + **/ +s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_validate_nvm_checksum_generic"); + + for (i = 0; i < (NVM_CHECKSUM_REG + 1); i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error\n"); + return ret_val; + } + checksum += nvm_data; + } + + if (checksum != (u16) NVM_SUM) { + DEBUGOUT("NVM Checksum Invalid\n"); + return -E1000_ERR_NVM; + } + + return E1000_SUCCESS; +} + +/** + * e1000_update_nvm_checksum_generic - Update EEPROM checksum + * @hw: pointer to the HW structure + * + * Updates the EEPROM checksum by reading/adding each word of the EEPROM + * up to the checksum. Then calculates the EEPROM checksum and writes the + * value to the EEPROM. + **/ +s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 checksum = 0; + u16 i, nvm_data; + + DEBUGFUNC("e1000_update_nvm_checksum"); + + for (i = 0; i < NVM_CHECKSUM_REG; i++) { + ret_val = hw->nvm.ops.read(hw, i, 1, &nvm_data); + if (ret_val) { + DEBUGOUT("NVM Read Error while updating checksum.\n"); + return ret_val; + } + checksum += nvm_data; + } + checksum = (u16) NVM_SUM - checksum; + ret_val = hw->nvm.ops.write(hw, NVM_CHECKSUM_REG, 1, &checksum); + if (ret_val) + DEBUGOUT("NVM Write Error while updating checksum.\n"); + + return ret_val; +} + +/** + * e1000_reload_nvm_generic - Reloads EEPROM + * @hw: pointer to the HW structure + * + * Reloads the EEPROM by setting the "Reinitialize from EEPROM" bit in the + * extended control register. + **/ +static void e1000_reload_nvm_generic(struct e1000_hw *hw) +{ + u32 ctrl_ext; + + DEBUGFUNC("e1000_reload_nvm_generic"); + + usec_delay(10); + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + ctrl_ext |= E1000_CTRL_EXT_EE_RST; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + E1000_WRITE_FLUSH(hw); +} + +/** + * e1000_get_fw_version - Get firmware version information + * @hw: pointer to the HW structure + * @fw_vers: pointer to output version structure + * + * unsupported/not present features return 0 in version structure + **/ +void e1000_get_fw_version(struct e1000_hw *hw, struct e1000_fw_version *fw_vers) +{ + u16 eeprom_verh, eeprom_verl, etrack_test, fw_version; + u8 q, hval, rem, result; + u16 comb_verh, comb_verl, comb_offset; + + memset(fw_vers, 0, sizeof(struct e1000_fw_version)); + + /* basic eeprom version numbers, bits used vary by part and by tool + * used to create the nvm images */ + /* Check which data format we have */ + hw->nvm.ops.read(hw, NVM_ETRACK_HIWORD, 1, &etrack_test); + switch (hw->mac.type) { + case e1000_i211: + e1000_read_invm_version(hw, fw_vers); + return; + case e1000_82575: + case e1000_82576: + case e1000_82580: + /* Use this format, unless EETRACK ID exists, + * then use alternate format + */ + if ((etrack_test & NVM_MAJOR_MASK) != NVM_ETRACK_VALID) { + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); + fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) + >> NVM_MAJOR_SHIFT; + fw_vers->eep_minor = (fw_version & NVM_MINOR_MASK) + >> NVM_MINOR_SHIFT; + fw_vers->eep_build = (fw_version & NVM_IMAGE_ID_MASK); + goto etrack_id; + } + break; + case e1000_i210: + if (!(e1000_get_flash_presence_i210(hw))) { + e1000_read_invm_version(hw, fw_vers); + return; + } + /* fall through */ + case e1000_i350: + case e1000_i354: + /* find combo image version */ + hw->nvm.ops.read(hw, NVM_COMB_VER_PTR, 1, &comb_offset); + if ((comb_offset != 0x0) && + (comb_offset != NVM_VER_INVALID)) { + + hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset + + 1), 1, &comb_verh); + hw->nvm.ops.read(hw, (NVM_COMB_VER_OFF + comb_offset), + 1, &comb_verl); + + /* get Option Rom version if it exists and is valid */ + if ((comb_verh && comb_verl) && + ((comb_verh != NVM_VER_INVALID) && + (comb_verl != NVM_VER_INVALID))) { + + fw_vers->or_valid = true; + fw_vers->or_major = + comb_verl >> NVM_COMB_VER_SHFT; + fw_vers->or_build = + (comb_verl << NVM_COMB_VER_SHFT) + | (comb_verh >> NVM_COMB_VER_SHFT); + fw_vers->or_patch = + comb_verh & NVM_COMB_VER_MASK; + } + } + break; + default: + return; + } + hw->nvm.ops.read(hw, NVM_VERSION, 1, &fw_version); + fw_vers->eep_major = (fw_version & NVM_MAJOR_MASK) + >> NVM_MAJOR_SHIFT; + + /* check for old style version format in newer images*/ + if ((fw_version & NVM_NEW_DEC_MASK) == 0x0) { + eeprom_verl = (fw_version & NVM_COMB_VER_MASK); + } else { + eeprom_verl = (fw_version & NVM_MINOR_MASK) + >> NVM_MINOR_SHIFT; + } + /* Convert minor value to hex before assigning to output struct + * Val to be converted will not be higher than 99, per tool output + */ + q = eeprom_verl / NVM_HEX_CONV; + hval = q * NVM_HEX_TENS; + rem = eeprom_verl % NVM_HEX_CONV; + result = hval + rem; + fw_vers->eep_minor = result; + +etrack_id: + if ((etrack_test & NVM_MAJOR_MASK) == NVM_ETRACK_VALID) { + hw->nvm.ops.read(hw, NVM_ETRACK_WORD, 1, &eeprom_verl); + hw->nvm.ops.read(hw, (NVM_ETRACK_WORD + 1), 1, &eeprom_verh); + fw_vers->etrack_id = (eeprom_verh << NVM_ETRACK_SHIFT) + | eeprom_verl; + } + return; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h new file mode 100644 index 00000000..fe62785a --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_nvm.h @@ -0,0 +1,75 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_NVM_H_ +#define _E1000_NVM_H_ + + +struct e1000_fw_version { + u32 etrack_id; + u16 eep_major; + u16 eep_minor; + u16 eep_build; + + u8 invm_major; + u8 invm_minor; + u8 invm_img_type; + + bool or_valid; + u16 or_major; + u16 or_build; + u16 or_patch; +}; + + +void e1000_init_nvm_ops_generic(struct e1000_hw *hw); +s32 e1000_null_read_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); +void e1000_null_nvm_generic(struct e1000_hw *hw); +s32 e1000_null_led_default(struct e1000_hw *hw, u16 *data); +s32 e1000_null_write_nvm(struct e1000_hw *hw, u16 a, u16 b, u16 *c); +s32 e1000_acquire_nvm_generic(struct e1000_hw *hw); + +s32 e1000_poll_eerd_eewr_done(struct e1000_hw *hw, int ee_reg); +s32 e1000_read_mac_addr_generic(struct e1000_hw *hw); +s32 e1000_read_pba_string_generic(struct e1000_hw *hw, u8 *pba_num, + u32 pba_num_size); +s32 e1000_read_pba_length_generic(struct e1000_hw *hw, u32 *pba_num_size); +s32 e1000_read_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, u16 *data); +s32 e1000_read_nvm_eerd(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); +s32 e1000_valid_led_default_generic(struct e1000_hw *hw, u16 *data); +s32 e1000_validate_nvm_checksum_generic(struct e1000_hw *hw); +s32 e1000_write_nvm_spi(struct e1000_hw *hw, u16 offset, u16 words, + u16 *data); +s32 e1000_update_nvm_checksum_generic(struct e1000_hw *hw); +void e1000_release_nvm_generic(struct e1000_hw *hw); +void e1000_get_fw_version(struct e1000_hw *hw, + struct e1000_fw_version *fw_vers); + +#define E1000_STM_OPCODE 0xDB00 + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h new file mode 100644 index 00000000..d1cf98e2 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_osdep.h @@ -0,0 +1,136 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +/* glue for the OS independent part of e1000 + * includes register access macros + */ + +#ifndef _E1000_OSDEP_H_ +#define _E1000_OSDEP_H_ + +#include +#include +#include +#include +#include +#include "kcompat.h" + +#ifndef __INTEL_COMPILER +#pragma GCC diagnostic ignored "-Wunused-function" +#endif + +#define usec_delay(x) udelay(x) +#define usec_delay_irq(x) udelay(x) +#ifndef msec_delay +#define msec_delay(x) do { \ + /* Don't mdelay in interrupt context! */ \ + if (in_interrupt()) \ + BUG(); \ + else \ + msleep(x); \ +} while (0) + +/* Some workarounds require millisecond delays and are run during interrupt + * context. Most notably, when establishing link, the phy may need tweaking + * but cannot process phy register reads/writes faster than millisecond + * intervals...and we establish link due to a "link status change" interrupt. + */ +#define msec_delay_irq(x) mdelay(x) +#endif + +#define PCI_COMMAND_REGISTER PCI_COMMAND +#define CMD_MEM_WRT_INVALIDATE PCI_COMMAND_INVALIDATE +#define ETH_ADDR_LEN ETH_ALEN + +#ifdef __BIG_ENDIAN +#define E1000_BIG_ENDIAN __BIG_ENDIAN +#endif + + +#ifdef DEBUG +#define DEBUGOUT(S) printk(KERN_DEBUG S) +#define DEBUGOUT1(S, A...) printk(KERN_DEBUG S, ## A) +#else +#define DEBUGOUT(S) +#define DEBUGOUT1(S, A...) +#endif + +#ifdef DEBUG_FUNC +#define DEBUGFUNC(F) DEBUGOUT(F "\n") +#else +#define DEBUGFUNC(F) +#endif +#define DEBUGOUT2 DEBUGOUT1 +#define DEBUGOUT3 DEBUGOUT2 +#define DEBUGOUT7 DEBUGOUT3 + +#define E1000_REGISTER(a, reg) reg + +#define E1000_WRITE_REG(a, reg, value) ( \ + writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg)))) + +#define E1000_READ_REG(a, reg) (readl((a)->hw_addr + E1000_REGISTER(a, reg))) + +#define E1000_WRITE_REG_ARRAY(a, reg, offset, value) ( \ + writel((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2)))) + +#define E1000_READ_REG_ARRAY(a, reg, offset) ( \ + readl((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 2))) + +#define E1000_READ_REG_ARRAY_DWORD E1000_READ_REG_ARRAY +#define E1000_WRITE_REG_ARRAY_DWORD E1000_WRITE_REG_ARRAY + +#define E1000_WRITE_REG_ARRAY_WORD(a, reg, offset, value) ( \ + writew((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1)))) + +#define E1000_READ_REG_ARRAY_WORD(a, reg, offset) ( \ + readw((a)->hw_addr + E1000_REGISTER(a, reg) + ((offset) << 1))) + +#define E1000_WRITE_REG_ARRAY_BYTE(a, reg, offset, value) ( \ + writeb((value), ((a)->hw_addr + E1000_REGISTER(a, reg) + (offset)))) + +#define E1000_READ_REG_ARRAY_BYTE(a, reg, offset) ( \ + readb((a)->hw_addr + E1000_REGISTER(a, reg) + (offset))) + +#define E1000_WRITE_REG_IO(a, reg, offset) do { \ + outl(reg, ((a)->io_base)); \ + outl(offset, ((a)->io_base + 4)); } while (0) + +#define E1000_WRITE_FLUSH(a) E1000_READ_REG(a, E1000_STATUS) + +#define E1000_WRITE_FLASH_REG(a, reg, value) ( \ + writel((value), ((a)->flash_address + reg))) + +#define E1000_WRITE_FLASH_REG16(a, reg, value) ( \ + writew((value), ((a)->flash_address + reg))) + +#define E1000_READ_FLASH_REG(a, reg) (readl((a)->flash_address + reg)) + +#define E1000_READ_FLASH_REG16(a, reg) (readw((a)->flash_address + reg)) + +#endif /* _E1000_OSDEP_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c new file mode 100644 index 00000000..df224702 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.c @@ -0,0 +1,3405 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "e1000_api.h" + +static s32 e1000_wait_autoneg(struct e1000_hw *hw); +/* Cable length tables */ +static const u16 e1000_m88_cable_length_table[] = { + 0, 50, 80, 110, 140, 140, E1000_CABLE_LENGTH_UNDEFINED }; +#define M88E1000_CABLE_LENGTH_TABLE_SIZE \ + (sizeof(e1000_m88_cable_length_table) / \ + sizeof(e1000_m88_cable_length_table[0])) + +static const u16 e1000_igp_2_cable_length_table[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 3, 5, 8, 11, 13, 16, 18, 21, 0, 0, 0, 3, + 6, 10, 13, 16, 19, 23, 26, 29, 32, 35, 38, 41, 6, 10, 14, 18, 22, + 26, 30, 33, 37, 41, 44, 48, 51, 54, 58, 61, 21, 26, 31, 35, 40, + 44, 49, 53, 57, 61, 65, 68, 72, 75, 79, 82, 40, 45, 51, 56, 61, + 66, 70, 75, 79, 83, 87, 91, 94, 98, 101, 104, 60, 66, 72, 77, 82, + 87, 92, 96, 100, 104, 108, 111, 114, 117, 119, 121, 83, 89, 95, + 100, 105, 109, 113, 116, 119, 122, 124, 104, 109, 114, 118, 121, + 124}; +#define IGP02E1000_CABLE_LENGTH_TABLE_SIZE \ + (sizeof(e1000_igp_2_cable_length_table) / \ + sizeof(e1000_igp_2_cable_length_table[0])) + +/** + * e1000_init_phy_ops_generic - Initialize PHY function pointers + * @hw: pointer to the HW structure + * + * Setups up the function pointers to no-op functions + **/ +void e1000_init_phy_ops_generic(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + DEBUGFUNC("e1000_init_phy_ops_generic"); + + /* Initialize function pointers */ + phy->ops.init_params = e1000_null_ops_generic; + phy->ops.acquire = e1000_null_ops_generic; + phy->ops.check_polarity = e1000_null_ops_generic; + phy->ops.check_reset_block = e1000_null_ops_generic; + phy->ops.commit = e1000_null_ops_generic; + phy->ops.force_speed_duplex = e1000_null_ops_generic; + phy->ops.get_cfg_done = e1000_null_ops_generic; + phy->ops.get_cable_length = e1000_null_ops_generic; + phy->ops.get_info = e1000_null_ops_generic; + phy->ops.set_page = e1000_null_set_page; + phy->ops.read_reg = e1000_null_read_reg; + phy->ops.read_reg_locked = e1000_null_read_reg; + phy->ops.read_reg_page = e1000_null_read_reg; + phy->ops.release = e1000_null_phy_generic; + phy->ops.reset = e1000_null_ops_generic; + phy->ops.set_d0_lplu_state = e1000_null_lplu_state; + phy->ops.set_d3_lplu_state = e1000_null_lplu_state; + phy->ops.write_reg = e1000_null_write_reg; + phy->ops.write_reg_locked = e1000_null_write_reg; + phy->ops.write_reg_page = e1000_null_write_reg; + phy->ops.power_up = e1000_null_phy_generic; + phy->ops.power_down = e1000_null_phy_generic; + phy->ops.read_i2c_byte = e1000_read_i2c_byte_null; + phy->ops.write_i2c_byte = e1000_write_i2c_byte_null; +} + +/** + * e1000_null_set_page - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_set_page(struct e1000_hw E1000_UNUSEDARG *hw, + u16 E1000_UNUSEDARG data) +{ + DEBUGFUNC("e1000_null_set_page"); + return E1000_SUCCESS; +} + +/** + * e1000_null_read_reg - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_read_reg(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG *data) +{ + DEBUGFUNC("e1000_null_read_reg"); + return E1000_SUCCESS; +} + +/** + * e1000_null_phy_generic - No-op function, return void + * @hw: pointer to the HW structure + **/ +void e1000_null_phy_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_null_phy_generic"); + return; +} + +/** + * e1000_null_lplu_state - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_lplu_state(struct e1000_hw E1000_UNUSEDARG *hw, + bool E1000_UNUSEDARG active) +{ + DEBUGFUNC("e1000_null_lplu_state"); + return E1000_SUCCESS; +} + +/** + * e1000_null_write_reg - No-op function, return 0 + * @hw: pointer to the HW structure + **/ +s32 e1000_null_write_reg(struct e1000_hw E1000_UNUSEDARG *hw, + u32 E1000_UNUSEDARG offset, u16 E1000_UNUSEDARG data) +{ + DEBUGFUNC("e1000_null_write_reg"); + return E1000_SUCCESS; +} + +/** + * e1000_read_i2c_byte_null - No-op function, return 0 + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: data value read + * + **/ +s32 e1000_read_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG byte_offset, + u8 E1000_UNUSEDARG dev_addr, + u8 E1000_UNUSEDARG *data) +{ + DEBUGFUNC("e1000_read_i2c_byte_null"); + return E1000_SUCCESS; +} + +/** + * e1000_write_i2c_byte_null - No-op function, return 0 + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: data value to write + * + **/ +s32 e1000_write_i2c_byte_null(struct e1000_hw E1000_UNUSEDARG *hw, + u8 E1000_UNUSEDARG byte_offset, + u8 E1000_UNUSEDARG dev_addr, + u8 E1000_UNUSEDARG data) +{ + DEBUGFUNC("e1000_write_i2c_byte_null"); + return E1000_SUCCESS; +} + +/** + * e1000_check_reset_block_generic - Check if PHY reset is blocked + * @hw: pointer to the HW structure + * + * Read the PHY management control register and check whether a PHY reset + * is blocked. If a reset is not blocked return E1000_SUCCESS, otherwise + * return E1000_BLK_PHY_RESET (12). + **/ +s32 e1000_check_reset_block_generic(struct e1000_hw *hw) +{ + u32 manc; + + DEBUGFUNC("e1000_check_reset_block"); + + manc = E1000_READ_REG(hw, E1000_MANC); + + return (manc & E1000_MANC_BLK_PHY_RST_ON_IDE) ? + E1000_BLK_PHY_RESET : E1000_SUCCESS; +} + +/** + * e1000_get_phy_id - Retrieve the PHY ID and revision + * @hw: pointer to the HW structure + * + * Reads the PHY registers and stores the PHY ID and possibly the PHY + * revision in the hardware structure. + **/ +s32 e1000_get_phy_id(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val = E1000_SUCCESS; + u16 phy_id; + + DEBUGFUNC("e1000_get_phy_id"); + + if (!phy->ops.read_reg) + return E1000_SUCCESS; + + ret_val = phy->ops.read_reg(hw, PHY_ID1, &phy_id); + if (ret_val) + return ret_val; + + phy->id = (u32)(phy_id << 16); + usec_delay(20); + ret_val = phy->ops.read_reg(hw, PHY_ID2, &phy_id); + if (ret_val) + return ret_val; + + phy->id |= (u32)(phy_id & PHY_REVISION_MASK); + phy->revision = (u32)(phy_id & ~PHY_REVISION_MASK); + + + return E1000_SUCCESS; +} + +/** + * e1000_phy_reset_dsp_generic - Reset PHY DSP + * @hw: pointer to the HW structure + * + * Reset the digital signal processor. + **/ +s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw) +{ + s32 ret_val; + + DEBUGFUNC("e1000_phy_reset_dsp_generic"); + + if (!hw->phy.ops.write_reg) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0xC1); + if (ret_val) + return ret_val; + + return hw->phy.ops.write_reg(hw, M88E1000_PHY_GEN_CONTROL, 0); +} + +/** + * e1000_read_phy_reg_mdic - Read MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the MDI control register in the PHY at offset and stores the + * information read to data. + **/ +s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, mdic = 0; + + DEBUGFUNC("e1000_read_phy_reg_mdic"); + + if (offset > MAX_PHY_REG_ADDRESS) { + DEBUGOUT1("PHY Address %d is out of range\n", offset); + return -E1000_ERR_PARAM; + } + + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + mdic = ((offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_READ)); + + E1000_WRITE_REG(hw, E1000_MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usec_delay_irq(50); + mdic = E1000_READ_REG(hw, E1000_MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + DEBUGOUT("MDI Read did not complete\n"); + return -E1000_ERR_PHY; + } + if (mdic & E1000_MDIC_ERROR) { + DEBUGOUT("MDI Error\n"); + return -E1000_ERR_PHY; + } + if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + DEBUGOUT2("MDI Read offset error - requested %d, returned %d\n", + offset, + (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + return -E1000_ERR_PHY; + } + *data = (u16) mdic; + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_mdic - Write MDI control register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write to register at offset + * + * Writes data to MDI control register in the PHY at offset. + **/ +s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, mdic = 0; + + DEBUGFUNC("e1000_write_phy_reg_mdic"); + + if (offset > MAX_PHY_REG_ADDRESS) { + DEBUGOUT1("PHY Address %d is out of range\n", offset); + return -E1000_ERR_PARAM; + } + + /* Set up Op-code, Phy Address, and register offset in the MDI + * Control register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + mdic = (((u32)data) | + (offset << E1000_MDIC_REG_SHIFT) | + (phy->addr << E1000_MDIC_PHY_SHIFT) | + (E1000_MDIC_OP_WRITE)); + + E1000_WRITE_REG(hw, E1000_MDIC, mdic); + + /* Poll the ready bit to see if the MDI read completed + * Increasing the time out as testing showed failures with + * the lower time out + */ + for (i = 0; i < (E1000_GEN_POLL_TIMEOUT * 3); i++) { + usec_delay_irq(50); + mdic = E1000_READ_REG(hw, E1000_MDIC); + if (mdic & E1000_MDIC_READY) + break; + } + if (!(mdic & E1000_MDIC_READY)) { + DEBUGOUT("MDI Write did not complete\n"); + return -E1000_ERR_PHY; + } + if (mdic & E1000_MDIC_ERROR) { + DEBUGOUT("MDI Error\n"); + return -E1000_ERR_PHY; + } + if (((mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT) != offset) { + DEBUGOUT2("MDI Write offset error - requested %d, returned %d\n", + offset, + (mdic & E1000_MDIC_REG_MASK) >> E1000_MDIC_REG_SHIFT); + return -E1000_ERR_PHY; + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_phy_reg_i2c - Read PHY register using i2c + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset using the i2c interface and stores the + * retrieved information in data. + **/ +s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, i2ccmd = 0; + + DEBUGFUNC("e1000_read_phy_reg_i2c"); + + /* Set up Op-code, Phy Address, and register address in the I2CCMD + * register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | + (E1000_I2CCMD_OPCODE_READ)); + + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + + /* Poll the ready bit to see if the I2C read completed */ + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); + if (i2ccmd & E1000_I2CCMD_READY) + break; + } + if (!(i2ccmd & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Read did not complete\n"); + return -E1000_ERR_PHY; + } + if (i2ccmd & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + + /* Need to byte-swap the 16-bit value. */ + *data = ((i2ccmd >> 8) & 0x00FF) | ((i2ccmd << 8) & 0xFF00); + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_i2c - Write PHY register using i2c + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes the data to PHY register at the offset using the i2c interface. + **/ +s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data) +{ + struct e1000_phy_info *phy = &hw->phy; + u32 i, i2ccmd = 0; + u16 phy_data_swapped; + + DEBUGFUNC("e1000_write_phy_reg_i2c"); + + /* Prevent overwritting SFP I2C EEPROM which is at A0 address.*/ + if ((hw->phy.addr == 0) || (hw->phy.addr > 7)) { + DEBUGOUT1("PHY I2C Address %d is out of range.\n", + hw->phy.addr); + return -E1000_ERR_CONFIG; + } + + /* Swap the data bytes for the I2C interface */ + phy_data_swapped = ((data >> 8) & 0x00FF) | ((data << 8) & 0xFF00); + + /* Set up Op-code, Phy Address, and register address in the I2CCMD + * register. The MAC will take care of interfacing with the + * PHY to retrieve the desired data. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + (phy->addr << E1000_I2CCMD_PHY_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_WRITE | + phy_data_swapped); + + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + + /* Poll the ready bit to see if the I2C read completed */ + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); + if (i2ccmd & E1000_I2CCMD_READY) + break; + } + if (!(i2ccmd & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Write did not complete\n"); + return -E1000_ERR_PHY; + } + if (i2ccmd & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + + return E1000_SUCCESS; +} + +/** + * e1000_read_sfp_data_byte - Reads SFP module data. + * @hw: pointer to the HW structure + * @offset: byte location offset to be read + * @data: read data buffer pointer + * + * Reads one byte from SFP module data stored + * in SFP resided EEPROM memory or SFP diagnostic area. + * Function should be called with + * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access + * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters + * access + **/ +s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data) +{ + u32 i = 0; + u32 i2ccmd = 0; + u32 data_local = 0; + + DEBUGFUNC("e1000_read_sfp_data_byte"); + + if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { + DEBUGOUT("I2CCMD command address exceeds upper limit\n"); + return -E1000_ERR_PHY; + } + + /* Set up Op-code, EEPROM Address,in the I2CCMD + * register. The MAC will take care of interfacing with the + * EEPROM to retrieve the desired data. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_READ); + + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + + /* Poll the ready bit to see if the I2C read completed */ + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + data_local = E1000_READ_REG(hw, E1000_I2CCMD); + if (data_local & E1000_I2CCMD_READY) + break; + } + if (!(data_local & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Read did not complete\n"); + return -E1000_ERR_PHY; + } + if (data_local & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + *data = (u8) data_local & 0xFF; + + return E1000_SUCCESS; +} + +/** + * e1000_write_sfp_data_byte - Writes SFP module data. + * @hw: pointer to the HW structure + * @offset: byte location offset to write to + * @data: data to write + * + * Writes one byte to SFP module data stored + * in SFP resided EEPROM memory or SFP diagnostic area. + * Function should be called with + * E1000_I2CCMD_SFP_DATA_ADDR() for SFP module database access + * E1000_I2CCMD_SFP_DIAG_ADDR() for SFP diagnostics parameters + * access + **/ +s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data) +{ + u32 i = 0; + u32 i2ccmd = 0; + u32 data_local = 0; + + DEBUGFUNC("e1000_write_sfp_data_byte"); + + if (offset > E1000_I2CCMD_SFP_DIAG_ADDR(255)) { + DEBUGOUT("I2CCMD command address exceeds upper limit\n"); + return -E1000_ERR_PHY; + } + /* The programming interface is 16 bits wide + * so we need to read the whole word first + * then update appropriate byte lane and write + * the updated word back. + */ + /* Set up Op-code, EEPROM Address,in the I2CCMD + * register. The MAC will take care of interfacing + * with an EEPROM to write the data given. + */ + i2ccmd = ((offset << E1000_I2CCMD_REG_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_READ); + /* Set a command to read single word */ + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + for (i = 0; i < E1000_I2CCMD_PHY_TIMEOUT; i++) { + usec_delay(50); + /* Poll the ready bit to see if lastly + * launched I2C operation completed + */ + i2ccmd = E1000_READ_REG(hw, E1000_I2CCMD); + if (i2ccmd & E1000_I2CCMD_READY) { + /* Check if this is READ or WRITE phase */ + if ((i2ccmd & E1000_I2CCMD_OPCODE_READ) == + E1000_I2CCMD_OPCODE_READ) { + /* Write the selected byte + * lane and update whole word + */ + data_local = i2ccmd & 0xFF00; + data_local |= data; + i2ccmd = ((offset << + E1000_I2CCMD_REG_ADDR_SHIFT) | + E1000_I2CCMD_OPCODE_WRITE | data_local); + E1000_WRITE_REG(hw, E1000_I2CCMD, i2ccmd); + } else { + break; + } + } + } + if (!(i2ccmd & E1000_I2CCMD_READY)) { + DEBUGOUT("I2CCMD Write did not complete\n"); + return -E1000_ERR_PHY; + } + if (i2ccmd & E1000_I2CCMD_ERROR) { + DEBUGOUT("I2CCMD Error bit set\n"); + return -E1000_ERR_PHY; + } + return E1000_SUCCESS; +} + +/** + * e1000_read_phy_reg_m88 - Read m88 PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Acquires semaphore, if necessary, then reads the PHY register at offset + * and storing the retrieved information in data. Release any acquired + * semaphores before exiting. + **/ +s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_read_phy_reg_m88"); + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_read_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, + data); + + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_write_phy_reg_m88 - Write m88 PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data) +{ + s32 ret_val; + + DEBUGFUNC("e1000_write_phy_reg_m88"); + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & offset, + data); + + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_set_page_igp - Set page as on IGP-like PHY(s) + * @hw: pointer to the HW structure + * @page: page to set (shifted left when necessary) + * + * Sets PHY page required for PHY register access. Assumes semaphore is + * already acquired. Note, this function sets phy.addr to 1 so the caller + * must set it appropriately (if necessary) after this function returns. + **/ +s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page) +{ + DEBUGFUNC("e1000_set_page_igp"); + + DEBUGOUT1("Setting page 0x%x\n", page); + + hw->phy.addr = 1; + + return e1000_write_phy_reg_mdic(hw, IGP01E1000_PHY_PAGE_SELECT, page); +} + +/** + * __e1000_read_phy_reg_igp - Read igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary, then reads the PHY register at offset + * and stores the retrieved information in data. Release any acquired + * semaphores before exiting. + **/ +static s32 __e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data, + bool locked) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("__e1000_read_phy_reg_igp"); + + if (!locked) { + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + if (offset > MAX_PHY_MULTI_PAGE_REG) + ret_val = e1000_write_phy_reg_mdic(hw, + IGP01E1000_PHY_PAGE_SELECT, + (u16)offset); + if (!ret_val) + ret_val = e1000_read_phy_reg_mdic(hw, + MAX_PHY_REG_ADDRESS & offset, + data); + if (!locked) + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_read_phy_reg_igp - Read igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Acquires semaphore then reads the PHY register at offset and stores the + * retrieved information in data. + * Release the acquired semaphore before exiting. + **/ +s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_phy_reg_igp(hw, offset, data, false); +} + +/** + * e1000_read_phy_reg_igp_locked - Read igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset and stores the retrieved information + * in data. Assumes semaphore already acquired. + **/ +s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_phy_reg_igp(hw, offset, data, true); +} + +/** + * e1000_write_phy_reg_igp - Write igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +static s32 __e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data, + bool locked) +{ + s32 ret_val = E1000_SUCCESS; + + DEBUGFUNC("e1000_write_phy_reg_igp"); + + if (!locked) { + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + if (offset > MAX_PHY_MULTI_PAGE_REG) + ret_val = e1000_write_phy_reg_mdic(hw, + IGP01E1000_PHY_PAGE_SELECT, + (u16)offset); + if (!ret_val) + ret_val = e1000_write_phy_reg_mdic(hw, MAX_PHY_REG_ADDRESS & + offset, + data); + if (!locked) + hw->phy.ops.release(hw); + + return ret_val; +} + +/** + * e1000_write_phy_reg_igp - Write igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_phy_reg_igp(hw, offset, data, false); +} + +/** + * e1000_write_phy_reg_igp_locked - Write igp PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Writes the data to PHY register at the offset. + * Assumes semaphore already acquired. + **/ +s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_phy_reg_igp(hw, offset, data, true); +} + +/** + * __e1000_read_kmrn_reg - Read kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary. Then reads the PHY register at offset + * using the kumeran interface. The information retrieved is stored in data. + * Release any acquired semaphores before exiting. + **/ +static s32 __e1000_read_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 *data, + bool locked) +{ + u32 kmrnctrlsta; + + DEBUGFUNC("__e1000_read_kmrn_reg"); + + if (!locked) { + s32 ret_val = E1000_SUCCESS; + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & + E1000_KMRNCTRLSTA_OFFSET) | E1000_KMRNCTRLSTA_REN; + E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); + E1000_WRITE_FLUSH(hw); + + usec_delay(2); + + kmrnctrlsta = E1000_READ_REG(hw, E1000_KMRNCTRLSTA); + *data = (u16)kmrnctrlsta; + + if (!locked) + hw->phy.ops.release(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_read_kmrn_reg_generic - Read kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Acquires semaphore then reads the PHY register at offset using the + * kumeran interface. The information retrieved is stored in data. + * Release the acquired semaphore before exiting. + **/ +s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_kmrn_reg(hw, offset, data, false); +} + +/** + * e1000_read_kmrn_reg_locked - Read kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to be read + * @data: pointer to the read data + * + * Reads the PHY register at offset using the kumeran interface. The + * information retrieved is stored in data. + * Assumes semaphore already acquired. + **/ +s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data) +{ + return __e1000_read_kmrn_reg(hw, offset, data, true); +} + +/** + * __e1000_write_kmrn_reg - Write kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * @locked: semaphore has already been acquired or not + * + * Acquires semaphore, if necessary. Then write the data to PHY register + * at the offset using the kumeran interface. Release any acquired semaphores + * before exiting. + **/ +static s32 __e1000_write_kmrn_reg(struct e1000_hw *hw, u32 offset, u16 data, + bool locked) +{ + u32 kmrnctrlsta; + + DEBUGFUNC("e1000_write_kmrn_reg_generic"); + + if (!locked) { + s32 ret_val = E1000_SUCCESS; + + if (!hw->phy.ops.acquire) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + } + + kmrnctrlsta = ((offset << E1000_KMRNCTRLSTA_OFFSET_SHIFT) & + E1000_KMRNCTRLSTA_OFFSET) | data; + E1000_WRITE_REG(hw, E1000_KMRNCTRLSTA, kmrnctrlsta); + E1000_WRITE_FLUSH(hw); + + usec_delay(2); + + if (!locked) + hw->phy.ops.release(hw); + + return E1000_SUCCESS; +} + +/** + * e1000_write_kmrn_reg_generic - Write kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore then writes the data to the PHY register at the offset + * using the kumeran interface. Release the acquired semaphore before exiting. + **/ +s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_kmrn_reg(hw, offset, data, false); +} + +/** + * e1000_write_kmrn_reg_locked - Write kumeran register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Write the data to PHY register at the offset using the kumeran interface. + * Assumes semaphore already acquired. + **/ +s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data) +{ + return __e1000_write_kmrn_reg(hw, offset, data, true); +} + +/** + * e1000_set_master_slave_mode - Setup PHY for Master/slave mode + * @hw: pointer to the HW structure + * + * Sets up Master/slave mode + **/ +static s32 e1000_set_master_slave_mode(struct e1000_hw *hw) +{ + s32 ret_val; + u16 phy_data; + + /* Resolve Master/Slave mode */ + ret_val = hw->phy.ops.read_reg(hw, PHY_1000T_CTRL, &phy_data); + if (ret_val) + return ret_val; + + /* load defaults for future use */ + hw->phy.original_ms_type = (phy_data & CR_1000T_MS_ENABLE) ? + ((phy_data & CR_1000T_MS_VALUE) ? + e1000_ms_force_master : + e1000_ms_force_slave) : e1000_ms_auto; + + switch (hw->phy.ms_type) { + case e1000_ms_force_master: + phy_data |= (CR_1000T_MS_ENABLE | CR_1000T_MS_VALUE); + break; + case e1000_ms_force_slave: + phy_data |= CR_1000T_MS_ENABLE; + phy_data &= ~(CR_1000T_MS_VALUE); + break; + case e1000_ms_auto: + phy_data &= ~CR_1000T_MS_ENABLE; + /* fall-through */ + default: + break; + } + + return hw->phy.ops.write_reg(hw, PHY_1000T_CTRL, phy_data); +} + +/** + * e1000_copper_link_setup_82577 - Setup 82577 PHY for copper link + * @hw: pointer to the HW structure + * + * Sets up Carrier-sense on Transmit and downshift values. + **/ +s32 e1000_copper_link_setup_82577(struct e1000_hw *hw) +{ + s32 ret_val; + u16 phy_data; + + DEBUGFUNC("e1000_copper_link_setup_82577"); + + if (hw->phy.reset_disable) + return E1000_SUCCESS; + + if (hw->phy.type == e1000_phy_82580) { + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + DEBUGOUT("Error resetting the PHY.\n"); + return ret_val; + } + } + + /* Enable CRS on Tx. This must be set for half-duplex operation. */ + ret_val = hw->phy.ops.read_reg(hw, I82577_CFG_REG, &phy_data); + if (ret_val) + return ret_val; + + phy_data |= I82577_CFG_ASSERT_CRS_ON_TX; + + /* Enable downshift */ + phy_data |= I82577_CFG_ENABLE_DOWNSHIFT; + + ret_val = hw->phy.ops.write_reg(hw, I82577_CFG_REG, phy_data); + if (ret_val) + return ret_val; + + /* Set MDI/MDIX mode */ + ret_val = hw->phy.ops.read_reg(hw, I82577_PHY_CTRL_2, &phy_data); + if (ret_val) + return ret_val; + phy_data &= ~I82577_PHY_CTRL2_MDIX_CFG_MASK; + /* Options: + * 0 - Auto (default) + * 1 - MDI mode + * 2 - MDI-X mode + */ + switch (hw->phy.mdix) { + case 1: + break; + case 2: + phy_data |= I82577_PHY_CTRL2_MANUAL_MDIX; + break; + case 0: + default: + phy_data |= I82577_PHY_CTRL2_AUTO_MDI_MDIX; + break; + } + ret_val = hw->phy.ops.write_reg(hw, I82577_PHY_CTRL_2, phy_data); + if (ret_val) + return ret_val; + + return e1000_set_master_slave_mode(hw); +} + +/** + * e1000_copper_link_setup_m88 - Setup m88 PHY's for copper link + * @hw: pointer to the HW structure + * + * Sets up MDI/MDI-X and polarity for m88 PHY's. If necessary, transmit clock + * and downshift values are set also. + **/ +s32 e1000_copper_link_setup_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + + DEBUGFUNC("e1000_copper_link_setup_m88"); + + if (phy->reset_disable) + return E1000_SUCCESS; + + /* Enable CRS on Tx. This must be set for half-duplex operation. */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + + /* Options: + * MDI/MDI-X = 0 (default) + * 0 - Auto for all speeds + * 1 - MDI mode + * 2 - MDI-X mode + * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) + */ + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + + switch (phy->mdix) { + case 1: + phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; + break; + case 2: + phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; + break; + case 3: + phy_data |= M88E1000_PSCR_AUTO_X_1000T; + break; + case 0: + default: + phy_data |= M88E1000_PSCR_AUTO_X_MODE; + break; + } + + /* Options: + * disable_polarity_correction = 0 (default) + * Automatic Correction for Reversed Cable Polarity + * 0 - Disabled + * 1 - Enabled + */ + phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; + if (phy->disable_polarity_correction) + phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; + + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + if (phy->revision < E1000_REVISION_4) { + /* Force TX_CLK in the Extended PHY Specific Control Register + * to 25MHz clock. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, + &phy_data); + if (ret_val) + return ret_val; + + phy_data |= M88E1000_EPSCR_TX_CLK_25; + + if ((phy->revision == E1000_REVISION_2) && + (phy->id == M88E1111_I_PHY_ID)) { + /* 82573L PHY - set the downshift counter to 5x. */ + phy_data &= ~M88EC018_EPSCR_DOWNSHIFT_COUNTER_MASK; + phy_data |= M88EC018_EPSCR_DOWNSHIFT_COUNTER_5X; + } else { + /* Configure Master and Slave downshift values */ + phy_data &= ~(M88E1000_EPSCR_MASTER_DOWNSHIFT_MASK | + M88E1000_EPSCR_SLAVE_DOWNSHIFT_MASK); + phy_data |= (M88E1000_EPSCR_MASTER_DOWNSHIFT_1X | + M88E1000_EPSCR_SLAVE_DOWNSHIFT_1X); + } + ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, + phy_data); + if (ret_val) + return ret_val; + } + + /* Commit the changes. */ + ret_val = phy->ops.commit(hw); + if (ret_val) { + DEBUGOUT("Error committing the PHY changes\n"); + return ret_val; + } + + return E1000_SUCCESS; +} + +/** + * e1000_copper_link_setup_m88_gen2 - Setup m88 PHY's for copper link + * @hw: pointer to the HW structure + * + * Sets up MDI/MDI-X and polarity for i347-AT4, m88e1322 and m88e1112 PHY's. + * Also enables and sets the downshift parameters. + **/ +s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + + DEBUGFUNC("e1000_copper_link_setup_m88_gen2"); + + if (phy->reset_disable) + return E1000_SUCCESS; + + /* Enable CRS on Tx. This must be set for half-duplex operation. */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + /* Options: + * MDI/MDI-X = 0 (default) + * 0 - Auto for all speeds + * 1 - MDI mode + * 2 - MDI-X mode + * 3 - Auto for 1000Base-T only (MDI-X for 10/100Base-T modes) + */ + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + + switch (phy->mdix) { + case 1: + phy_data |= M88E1000_PSCR_MDI_MANUAL_MODE; + break; + case 2: + phy_data |= M88E1000_PSCR_MDIX_MANUAL_MODE; + break; + case 3: + /* M88E1112 does not support this mode) */ + if (phy->id != M88E1112_E_PHY_ID) { + phy_data |= M88E1000_PSCR_AUTO_X_1000T; + break; + } + case 0: + default: + phy_data |= M88E1000_PSCR_AUTO_X_MODE; + break; + } + + /* Options: + * disable_polarity_correction = 0 (default) + * Automatic Correction for Reversed Cable Polarity + * 0 - Disabled + * 1 - Enabled + */ + phy_data &= ~M88E1000_PSCR_POLARITY_REVERSAL; + if (phy->disable_polarity_correction) + phy_data |= M88E1000_PSCR_POLARITY_REVERSAL; + + /* Enable downshift and setting it to X6 */ + if (phy->id == M88E1543_E_PHY_ID) { + phy_data &= ~I347AT4_PSCR_DOWNSHIFT_ENABLE; + ret_val = + phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + ret_val = phy->ops.commit(hw); + if (ret_val) { + DEBUGOUT("Error committing the PHY changes\n"); + return ret_val; + } + } + + phy_data &= ~I347AT4_PSCR_DOWNSHIFT_MASK; + phy_data |= I347AT4_PSCR_DOWNSHIFT_6X; + phy_data |= I347AT4_PSCR_DOWNSHIFT_ENABLE; + + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + /* Commit the changes. */ + ret_val = phy->ops.commit(hw); + if (ret_val) { + DEBUGOUT("Error committing the PHY changes\n"); + return ret_val; + } + + ret_val = e1000_set_master_slave_mode(hw); + if (ret_val) + return ret_val; + + return E1000_SUCCESS; +} + +/** + * e1000_copper_link_setup_igp - Setup igp PHY's for copper link + * @hw: pointer to the HW structure + * + * Sets up LPLU, MDI/MDI-X, polarity, Smartspeed and Master/Slave config for + * igp PHY's. + **/ +s32 e1000_copper_link_setup_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_copper_link_setup_igp"); + + if (phy->reset_disable) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.reset(hw); + if (ret_val) { + DEBUGOUT("Error resetting the PHY.\n"); + return ret_val; + } + + /* Wait 100ms for MAC to configure PHY from NVM settings, to avoid + * timeout issues when LFS is enabled. + */ + msec_delay(100); + + /* disable lplu d0 during driver init */ + if (hw->phy.ops.set_d0_lplu_state) { + ret_val = hw->phy.ops.set_d0_lplu_state(hw, false); + if (ret_val) { + DEBUGOUT("Error Disabling LPLU D0\n"); + return ret_val; + } + } + /* Configure mdi-mdix settings */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCR_AUTO_MDIX; + + switch (phy->mdix) { + case 1: + data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; + break; + case 2: + data |= IGP01E1000_PSCR_FORCE_MDI_MDIX; + break; + case 0: + default: + data |= IGP01E1000_PSCR_AUTO_MDIX; + break; + } + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, data); + if (ret_val) + return ret_val; + + /* set auto-master slave resolution settings */ + if (hw->mac.autoneg) { + /* when autonegotiation advertisement is only 1000Mbps then we + * should disable SmartSpeed and enable Auto MasterSlave + * resolution as hardware default. + */ + if (phy->autoneg_advertised == ADVERTISE_1000_FULL) { + /* Disable SmartSpeed */ + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + return ret_val; + + /* Set auto Master/Slave resolution process */ + ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, &data); + if (ret_val) + return ret_val; + + data &= ~CR_1000T_MS_ENABLE; + ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, data); + if (ret_val) + return ret_val; + } + + ret_val = e1000_set_master_slave_mode(hw); + } + + return ret_val; +} + +/** + * e1000_phy_setup_autoneg - Configure PHY for auto-negotiation + * @hw: pointer to the HW structure + * + * Reads the MII auto-neg advertisement register and/or the 1000T control + * register and if the PHY is already setup for auto-negotiation, then + * return successful. Otherwise, setup advertisement and flow control to + * the appropriate values for the wanted auto-negotiation. + **/ +static s32 e1000_phy_setup_autoneg(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 mii_autoneg_adv_reg; + u16 mii_1000t_ctrl_reg = 0; + + DEBUGFUNC("e1000_phy_setup_autoneg"); + + phy->autoneg_advertised &= phy->autoneg_mask; + + /* Read the MII Auto-Neg Advertisement Register (Address 4). */ + ret_val = phy->ops.read_reg(hw, PHY_AUTONEG_ADV, &mii_autoneg_adv_reg); + if (ret_val) + return ret_val; + + if (phy->autoneg_mask & ADVERTISE_1000_FULL) { + /* Read the MII 1000Base-T Control Register (Address 9). */ + ret_val = phy->ops.read_reg(hw, PHY_1000T_CTRL, + &mii_1000t_ctrl_reg); + if (ret_val) + return ret_val; + } + + /* Need to parse both autoneg_advertised and fc and set up + * the appropriate PHY registers. First we will parse for + * autoneg_advertised software override. Since we can advertise + * a plethora of combinations, we need to check each bit + * individually. + */ + + /* First we clear all the 10/100 mb speed bits in the Auto-Neg + * Advertisement Register (Address 4) and the 1000 mb speed bits in + * the 1000Base-T Control Register (Address 9). + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_100TX_FD_CAPS | + NWAY_AR_100TX_HD_CAPS | + NWAY_AR_10T_FD_CAPS | + NWAY_AR_10T_HD_CAPS); + mii_1000t_ctrl_reg &= ~(CR_1000T_HD_CAPS | CR_1000T_FD_CAPS); + + DEBUGOUT1("autoneg_advertised %x\n", phy->autoneg_advertised); + + /* Do we want to advertise 10 Mb Half Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_10_HALF) { + DEBUGOUT("Advertise 10mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_HD_CAPS; + } + + /* Do we want to advertise 10 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_10_FULL) { + DEBUGOUT("Advertise 10mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_10T_FD_CAPS; + } + + /* Do we want to advertise 100 Mb Half Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_100_HALF) { + DEBUGOUT("Advertise 100mb Half duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_HD_CAPS; + } + + /* Do we want to advertise 100 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_100_FULL) { + DEBUGOUT("Advertise 100mb Full duplex\n"); + mii_autoneg_adv_reg |= NWAY_AR_100TX_FD_CAPS; + } + + /* We do not allow the Phy to advertise 1000 Mb Half Duplex */ + if (phy->autoneg_advertised & ADVERTISE_1000_HALF) + DEBUGOUT("Advertise 1000mb Half duplex request denied!\n"); + + /* Do we want to advertise 1000 Mb Full Duplex? */ + if (phy->autoneg_advertised & ADVERTISE_1000_FULL) { + DEBUGOUT("Advertise 1000mb Full duplex\n"); + mii_1000t_ctrl_reg |= CR_1000T_FD_CAPS; + } + + /* Check for a software override of the flow control settings, and + * setup the PHY advertisement registers accordingly. If + * auto-negotiation is enabled, then software will have to set the + * "PAUSE" bits to the correct value in the Auto-Negotiation + * Advertisement Register (PHY_AUTONEG_ADV) and re-start auto- + * negotiation. + * + * The possible values of the "fc" parameter are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames + * but we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: No software override. The flow control configuration + * in the EEPROM is used. + */ + switch (hw->fc.current_mode) { + case e1000_fc_none: + /* Flow control (Rx & Tx) is completely disabled by a + * software over-ride. + */ + mii_autoneg_adv_reg &= ~(NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case e1000_fc_rx_pause: + /* Rx Flow control is enabled, and Tx Flow control is + * disabled, by a software over-ride. + * + * Since there really isn't a way to advertise that we are + * capable of Rx Pause ONLY, we will advertise that we + * support both symmetric and asymmetric Rx PAUSE. Later + * (in e1000_config_fc_after_link_up) we will disable the + * hw's ability to send PAUSE frames. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + case e1000_fc_tx_pause: + /* Tx Flow control is enabled, and Rx Flow control is + * disabled, by a software over-ride. + */ + mii_autoneg_adv_reg |= NWAY_AR_ASM_DIR; + mii_autoneg_adv_reg &= ~NWAY_AR_PAUSE; + break; + case e1000_fc_full: + /* Flow control (both Rx and Tx) is enabled by a software + * over-ride. + */ + mii_autoneg_adv_reg |= (NWAY_AR_ASM_DIR | NWAY_AR_PAUSE); + break; + default: + DEBUGOUT("Flow control param set incorrectly\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = phy->ops.write_reg(hw, PHY_AUTONEG_ADV, mii_autoneg_adv_reg); + if (ret_val) + return ret_val; + + DEBUGOUT1("Auto-Neg Advertising %x\n", mii_autoneg_adv_reg); + + if (phy->autoneg_mask & ADVERTISE_1000_FULL) + ret_val = phy->ops.write_reg(hw, PHY_1000T_CTRL, + mii_1000t_ctrl_reg); + + return ret_val; +} + +/** + * e1000_copper_link_autoneg - Setup/Enable autoneg for copper link + * @hw: pointer to the HW structure + * + * Performs initial bounds checking on autoneg advertisement parameter, then + * configure to advertise the full capability. Setup the PHY to autoneg + * and restart the negotiation process between the link partner. If + * autoneg_wait_to_complete, then wait for autoneg to complete before exiting. + **/ +static s32 e1000_copper_link_autoneg(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_ctrl; + + DEBUGFUNC("e1000_copper_link_autoneg"); + + /* Perform some bounds checking on the autoneg advertisement + * parameter. + */ + phy->autoneg_advertised &= phy->autoneg_mask; + + /* If autoneg_advertised is zero, we assume it was not defaulted + * by the calling code so we set to advertise full capability. + */ + if (!phy->autoneg_advertised) + phy->autoneg_advertised = phy->autoneg_mask; + + DEBUGOUT("Reconfiguring auto-neg advertisement params\n"); + ret_val = e1000_phy_setup_autoneg(hw); + if (ret_val) { + DEBUGOUT("Error Setting up Auto-Negotiation\n"); + return ret_val; + } + DEBUGOUT("Restarting Auto-Neg\n"); + + /* Restart auto-negotiation by setting the Auto Neg Enable bit and + * the Auto Neg Restart bit in the PHY control register. + */ + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); + if (ret_val) + return ret_val; + + phy_ctrl |= (MII_CR_AUTO_NEG_EN | MII_CR_RESTART_AUTO_NEG); + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_ctrl); + if (ret_val) + return ret_val; + + /* Does the user want to wait for Auto-Neg to complete here, or + * check at a later time (for example, callback routine). + */ + if (phy->autoneg_wait_to_complete) { + ret_val = e1000_wait_autoneg(hw); + if (ret_val) { + DEBUGOUT("Error while waiting for autoneg to complete\n"); + return ret_val; + } + } + + hw->mac.get_link_status = true; + + return ret_val; +} + +/** + * e1000_setup_copper_link_generic - Configure copper link settings + * @hw: pointer to the HW structure + * + * Calls the appropriate function to configure the link for auto-neg or forced + * speed and duplex. Then we check for link, once link is established calls + * to configure collision distance and flow control are called. If link is + * not established, we return -E1000_ERR_PHY (-2). + **/ +s32 e1000_setup_copper_link_generic(struct e1000_hw *hw) +{ + s32 ret_val; + bool link; + + DEBUGFUNC("e1000_setup_copper_link_generic"); + + if (hw->mac.autoneg) { + /* Setup autoneg and flow control advertisement and perform + * autonegotiation. + */ + ret_val = e1000_copper_link_autoneg(hw); + if (ret_val) + return ret_val; + } else { + /* PHY will be set to 10H, 10F, 100H or 100F + * depending on user settings. + */ + DEBUGOUT("Forcing Speed and Duplex\n"); + ret_val = hw->phy.ops.force_speed_duplex(hw); + if (ret_val) { + DEBUGOUT("Error Forcing Speed and Duplex\n"); + return ret_val; + } + } + + /* Check link status. Wait up to 100 microseconds for link to become + * valid. + */ + ret_val = e1000_phy_has_link_generic(hw, COPPER_LINK_UP_LIMIT, 10, + &link); + if (ret_val) + return ret_val; + + if (link) { + DEBUGOUT("Valid link established!!!\n"); + hw->mac.ops.config_collision_dist(hw); + ret_val = e1000_config_fc_after_link_up_generic(hw); + } else { + DEBUGOUT("Unable to establish link!!!\n"); + } + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_igp - Force speed/duplex for igp PHY + * @hw: pointer to the HW structure + * + * Calls the PHY setup function to force speed and duplex. Clears the + * auto-crossover to force MDI manually. Waits for link and returns + * successful if link up is successful, else -E1000_ERR_PHY (-2). + **/ +s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_igp"); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &phy_data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); + if (ret_val) + return ret_val; + + /* Clear Auto-Crossover to force MDI manually. IGP requires MDI + * forced whenever speed and duplex are forced. + */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data &= ~IGP01E1000_PSCR_AUTO_MDIX; + phy_data &= ~IGP01E1000_PSCR_FORCE_MDI_MDIX; + + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CTRL, phy_data); + if (ret_val) + return ret_val; + + DEBUGOUT1("IGP PSCR: %X\n", phy_data); + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on IGP phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + } + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_m88 - Force speed/duplex for m88 PHY + * @hw: pointer to the HW structure + * + * Calls the PHY setup function to force speed and duplex. Clears the + * auto-crossover to force MDI manually. Resets the PHY to commit the + * changes. If time expires while waiting for link up, we reset the DSP. + * After reset, TX_CLK and CRS on Tx must be set. Return successful upon + * successful completion, else return corresponding error code. + **/ +s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_m88"); + + /* I210 and I211 devices support Auto-Crossover in forced operation. */ + if (phy->type != e1000_phy_i210) { + /* Clear Auto-Crossover to force MDI manually. M88E1000 + * requires MDI forced whenever speed and duplex are forced. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, + &phy_data); + if (ret_val) + return ret_val; + + phy_data &= ~M88E1000_PSCR_AUTO_X_MODE; + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, + phy_data); + if (ret_val) + return ret_val; + } + + DEBUGOUT1("M88E1000 PSCR: %X\n", phy_data); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &phy_data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); + if (ret_val) + return ret_val; + + /* Reset the phy to commit changes. */ + ret_val = hw->phy.ops.commit(hw); + if (ret_val) + return ret_val; + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on M88 phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) { + bool reset_dsp = true; + + switch (hw->phy.id) { + case I347AT4_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1543_E_PHY_ID: + case I210_I_PHY_ID: + reset_dsp = false; + break; + default: + if (hw->phy.type != e1000_phy_m88) + reset_dsp = false; + break; + } + + if (!reset_dsp) { + DEBUGOUT("Link taking longer than expected.\n"); + } else { + /* We didn't get link. + * Reset the DSP and cross our fingers. + */ + ret_val = phy->ops.write_reg(hw, + M88E1000_PHY_PAGE_SELECT, + 0x001d); + if (ret_val) + return ret_val; + ret_val = e1000_phy_reset_dsp_generic(hw); + if (ret_val) + return ret_val; + } + } + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + } + + if (hw->phy.type != e1000_phy_m88) + return E1000_SUCCESS; + + if (hw->phy.id == I347AT4_E_PHY_ID || + hw->phy.id == M88E1340M_E_PHY_ID || + hw->phy.id == M88E1112_E_PHY_ID) + return E1000_SUCCESS; + if (hw->phy.id == I210_I_PHY_ID) + return E1000_SUCCESS; + if ((hw->phy.id == M88E1543_E_PHY_ID)) + return E1000_SUCCESS; + ret_val = phy->ops.read_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + /* Resetting the phy means we need to re-force TX_CLK in the + * Extended PHY Specific Control Register to 25MHz clock from + * the reset value of 2.5MHz. + */ + phy_data |= M88E1000_EPSCR_TX_CLK_25; + ret_val = phy->ops.write_reg(hw, M88E1000_EXT_PHY_SPEC_CTRL, phy_data); + if (ret_val) + return ret_val; + + /* In addition, we must re-enable CRS on Tx for both half and full + * duplex. + */ + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy_data |= M88E1000_PSCR_ASSERT_CRS_ON_TX; + ret_val = phy->ops.write_reg(hw, M88E1000_PHY_SPEC_CTRL, phy_data); + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_ife - Force PHY speed & duplex + * @hw: pointer to the HW structure + * + * Forces the speed and duplex settings of the PHY. + * This is a function pointer entry point only called by + * PHY setup routines. + **/ +s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_ife"); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, data); + if (ret_val) + return ret_val; + + /* Disable MDI-X support for 10/100 */ + ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); + if (ret_val) + return ret_val; + + data &= ~IFE_PMC_AUTO_MDIX; + data &= ~IFE_PMC_FORCE_MDIX; + + ret_val = phy->ops.write_reg(hw, IFE_PHY_MDIX_CONTROL, data); + if (ret_val) + return ret_val; + + DEBUGOUT1("IFE PMC: %X\n", data); + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on IFE phy.\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + } + + return E1000_SUCCESS; +} + +/** + * e1000_phy_force_speed_duplex_setup - Configure forced PHY speed/duplex + * @hw: pointer to the HW structure + * @phy_ctrl: pointer to current value of PHY_CONTROL + * + * Forces speed and duplex on the PHY by doing the following: disable flow + * control, force speed/duplex on the MAC, disable auto speed detection, + * disable auto-negotiation, configure duplex, configure speed, configure + * the collision distance, write configuration to CTRL register. The + * caller must write to the PHY_CONTROL register for these settings to + * take affect. + **/ +void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl) +{ + struct e1000_mac_info *mac = &hw->mac; + u32 ctrl; + + DEBUGFUNC("e1000_phy_force_speed_duplex_setup"); + + /* Turn off flow control when forcing speed/duplex */ + hw->fc.current_mode = e1000_fc_none; + + /* Force speed/duplex on the mac */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= (E1000_CTRL_FRCSPD | E1000_CTRL_FRCDPX); + ctrl &= ~E1000_CTRL_SPD_SEL; + + /* Disable Auto Speed Detection */ + ctrl &= ~E1000_CTRL_ASDE; + + /* Disable autoneg on the phy */ + *phy_ctrl &= ~MII_CR_AUTO_NEG_EN; + + /* Forcing Full or Half Duplex? */ + if (mac->forced_speed_duplex & E1000_ALL_HALF_DUPLEX) { + ctrl &= ~E1000_CTRL_FD; + *phy_ctrl &= ~MII_CR_FULL_DUPLEX; + DEBUGOUT("Half Duplex\n"); + } else { + ctrl |= E1000_CTRL_FD; + *phy_ctrl |= MII_CR_FULL_DUPLEX; + DEBUGOUT("Full Duplex\n"); + } + + /* Forcing 10mb or 100mb? */ + if (mac->forced_speed_duplex & E1000_ALL_100_SPEED) { + ctrl |= E1000_CTRL_SPD_100; + *phy_ctrl |= MII_CR_SPEED_100; + *phy_ctrl &= ~MII_CR_SPEED_1000; + DEBUGOUT("Forcing 100mb\n"); + } else { + ctrl &= ~(E1000_CTRL_SPD_1000 | E1000_CTRL_SPD_100); + *phy_ctrl &= ~(MII_CR_SPEED_1000 | MII_CR_SPEED_100); + DEBUGOUT("Forcing 10mb\n"); + } + + hw->mac.ops.config_collision_dist(hw); + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); +} + +/** + * e1000_set_d3_lplu_state_generic - Sets low power link up state for D3 + * @hw: pointer to the HW structure + * @active: boolean used to enable/disable lplu + * + * Success returns 0, Failure returns 1 + * + * The low power link up (lplu) state is set to the power management level D3 + * and SmartSpeed is disabled when active is true, else clear lplu for D3 + * and enable Smartspeed. LPLU and Smartspeed are mutually exclusive. LPLU + * is used during Dx states where the power conservation is most important. + * During driver activity, SmartSpeed should be enabled so performance is + * maintained. + **/ +s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_set_d3_lplu_state_generic"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + ret_val = phy->ops.read_reg(hw, IGP02E1000_PHY_POWER_MGMT, &data); + if (ret_val) + return ret_val; + + if (!active) { + data &= ~IGP02E1000_PM_D3_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + if (ret_val) + return ret_val; + /* LPLU and SmartSpeed are mutually exclusive. LPLU is used + * during Dx states where the power conservation is most + * important. During driver activity we should enable + * SmartSpeed, so performance is maintained. + */ + if (phy->smart_speed == e1000_smart_speed_on) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data |= IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + return ret_val; + } else if (phy->smart_speed == e1000_smart_speed_off) { + ret_val = phy->ops.read_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, + IGP01E1000_PHY_PORT_CONFIG, + data); + if (ret_val) + return ret_val; + } + } else if ((phy->autoneg_advertised == E1000_ALL_SPEED_DUPLEX) || + (phy->autoneg_advertised == E1000_ALL_NOT_GIG) || + (phy->autoneg_advertised == E1000_ALL_10_SPEED)) { + data |= IGP02E1000_PM_D3_LPLU; + ret_val = phy->ops.write_reg(hw, IGP02E1000_PHY_POWER_MGMT, + data); + if (ret_val) + return ret_val; + + /* When LPLU is enabled, we should disable SmartSpeed */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + &data); + if (ret_val) + return ret_val; + + data &= ~IGP01E1000_PSCFR_SMART_SPEED; + ret_val = phy->ops.write_reg(hw, IGP01E1000_PHY_PORT_CONFIG, + data); + } + + return ret_val; +} + +/** + * e1000_check_downshift_generic - Checks whether a downshift in speed occurred + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns 1 + * + * A downshift is detected by querying the PHY link health. + **/ +s32 e1000_check_downshift_generic(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, offset, mask; + + DEBUGFUNC("e1000_check_downshift_generic"); + + switch (phy->type) { + case e1000_phy_i210: + case e1000_phy_m88: + case e1000_phy_gg82563: + offset = M88E1000_PHY_SPEC_STATUS; + mask = M88E1000_PSSR_DOWNSHIFT; + break; + case e1000_phy_igp_2: + case e1000_phy_igp_3: + offset = IGP01E1000_PHY_LINK_HEALTH; + mask = IGP01E1000_PLHR_SS_DOWNGRADE; + break; + default: + /* speed downshift not supported */ + phy->speed_downgraded = false; + return E1000_SUCCESS; + } + + ret_val = phy->ops.read_reg(hw, offset, &phy_data); + + if (!ret_val) + phy->speed_downgraded = !!(phy_data & mask); + + return ret_val; +} + +/** + * e1000_check_polarity_m88 - Checks the polarity. + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns -E1000_ERR_PHY (-2) + * + * Polarity is determined based on the PHY specific status register. + **/ +s32 e1000_check_polarity_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_check_polarity_m88"); + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &data); + + if (!ret_val) + phy->cable_polarity = ((data & M88E1000_PSSR_REV_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_check_polarity_igp - Checks the polarity. + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns -E1000_ERR_PHY (-2) + * + * Polarity is determined based on the PHY port status register, and the + * current speed (since there is no polarity at 100Mbps). + **/ +s32 e1000_check_polarity_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data, offset, mask; + + DEBUGFUNC("e1000_check_polarity_igp"); + + /* Polarity is determined based on the speed of + * our connection. + */ + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); + if (ret_val) + return ret_val; + + if ((data & IGP01E1000_PSSR_SPEED_MASK) == + IGP01E1000_PSSR_SPEED_1000MBPS) { + offset = IGP01E1000_PHY_PCS_INIT_REG; + mask = IGP01E1000_PHY_POLARITY_MASK; + } else { + /* This really only applies to 10Mbps since + * there is no polarity for 100Mbps (always 0). + */ + offset = IGP01E1000_PHY_PORT_STATUS; + mask = IGP01E1000_PSSR_POLARITY_REVERSED; + } + + ret_val = phy->ops.read_reg(hw, offset, &data); + + if (!ret_val) + phy->cable_polarity = ((data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_check_polarity_ife - Check cable polarity for IFE PHY + * @hw: pointer to the HW structure + * + * Polarity is determined on the polarity reversal feature being enabled. + **/ +s32 e1000_check_polarity_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, offset, mask; + + DEBUGFUNC("e1000_check_polarity_ife"); + + /* Polarity is determined based on the reversal feature being enabled. + */ + if (phy->polarity_correction) { + offset = IFE_PHY_EXTENDED_STATUS_CONTROL; + mask = IFE_PESC_POLARITY_REVERSED; + } else { + offset = IFE_PHY_SPECIAL_CONTROL; + mask = IFE_PSC_FORCE_POLARITY; + } + + ret_val = phy->ops.read_reg(hw, offset, &phy_data); + + if (!ret_val) + phy->cable_polarity = ((phy_data & mask) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_wait_autoneg - Wait for auto-neg completion + * @hw: pointer to the HW structure + * + * Waits for auto-negotiation to complete or for the auto-negotiation time + * limit to expire, which ever happens first. + **/ +static s32 e1000_wait_autoneg(struct e1000_hw *hw) +{ + s32 ret_val = E1000_SUCCESS; + u16 i, phy_status; + + DEBUGFUNC("e1000_wait_autoneg"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + /* Break after autoneg completes or PHY_AUTO_NEG_LIMIT expires. */ + for (i = PHY_AUTO_NEG_LIMIT; i > 0; i--) { + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + if (phy_status & MII_SR_AUTONEG_COMPLETE) + break; + msec_delay(100); + } + + /* PHY_AUTO_NEG_TIME expiration doesn't guarantee auto-negotiation + * has completed. + */ + return ret_val; +} + +/** + * e1000_phy_has_link_generic - Polls PHY for link + * @hw: pointer to the HW structure + * @iterations: number of times to poll for link + * @usec_interval: delay between polling attempts + * @success: pointer to whether polling was successful or not + * + * Polls the PHY status register for link, 'iterations' number of times. + **/ +s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + u32 usec_interval, bool *success) +{ + s32 ret_val = E1000_SUCCESS; + u16 i, phy_status; + + DEBUGFUNC("e1000_phy_has_link_generic"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + for (i = 0; i < iterations; i++) { + /* Some PHYs require the PHY_STATUS register to be read + * twice due to the link bit being sticky. No harm doing + * it across the board. + */ + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + /* If the first read fails, another entity may have + * ownership of the resources, wait and try again to + * see if they have relinquished the resources yet. + */ + usec_delay(usec_interval); + ret_val = hw->phy.ops.read_reg(hw, PHY_STATUS, &phy_status); + if (ret_val) + break; + if (phy_status & MII_SR_LINK_STATUS) + break; + if (usec_interval >= 1000) + msec_delay_irq(usec_interval/1000); + else + usec_delay(usec_interval); + } + + *success = (i < iterations); + + return ret_val; +} + +/** + * e1000_get_cable_length_m88 - Determine cable length for m88 PHY + * @hw: pointer to the HW structure + * + * Reads the PHY specific status register to retrieve the cable length + * information. The cable length is determined by averaging the minimum and + * maximum values to get the "average" cable length. The m88 PHY has four + * possible cable length values, which are: + * Register Value Cable Length + * 0 < 50 meters + * 1 50 - 80 meters + * 2 80 - 110 meters + * 3 110 - 140 meters + * 4 > 140 meters + **/ +s32 e1000_get_cable_length_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, index; + + DEBUGFUNC("e1000_get_cable_length_m88"); + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); + if (ret_val) + return ret_val; + + index = ((phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT); + + if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) + return -E1000_ERR_PHY; + + phy->min_cable_length = e1000_m88_cable_length_table[index]; + phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; + + phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; + + return E1000_SUCCESS; +} + +s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, phy_data2, is_cm; + u16 index, default_page; + + DEBUGFUNC("e1000_get_cable_length_m88_gen2"); + + switch (hw->phy.id) { + case I210_I_PHY_ID: + /* Get cable length from PHY Cable Diagnostics Control Reg */ + ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + + (I347AT4_PCDL + phy->addr), + &phy_data); + if (ret_val) + return ret_val; + + /* Check if the unit of cable length is meters or cm */ + ret_val = phy->ops.read_reg(hw, (0x7 << GS40G_PAGE_SHIFT) + + I347AT4_PCDC, &phy_data2); + if (ret_val) + return ret_val; + + is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + + /* Populate the phy structure with cable length in meters */ + phy->min_cable_length = phy_data / (is_cm ? 100 : 1); + phy->max_cable_length = phy_data / (is_cm ? 100 : 1); + phy->cable_length = phy_data / (is_cm ? 100 : 1); + break; + case M88E1543_E_PHY_ID: + case M88E1340M_E_PHY_ID: + case I347AT4_E_PHY_ID: + /* Remember the original page select and set it to 7 */ + ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, + &default_page); + if (ret_val) + return ret_val; + + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x07); + if (ret_val) + return ret_val; + + /* Get cable length from PHY Cable Diagnostics Control Reg */ + ret_val = phy->ops.read_reg(hw, (I347AT4_PCDL + phy->addr), + &phy_data); + if (ret_val) + return ret_val; + + /* Check if the unit of cable length is meters or cm */ + ret_val = phy->ops.read_reg(hw, I347AT4_PCDC, &phy_data2); + if (ret_val) + return ret_val; + + is_cm = !(phy_data2 & I347AT4_PCDC_CABLE_LENGTH_UNIT); + + /* Populate the phy structure with cable length in meters */ + phy->min_cable_length = phy_data / (is_cm ? 100 : 1); + phy->max_cable_length = phy_data / (is_cm ? 100 : 1); + phy->cable_length = phy_data / (is_cm ? 100 : 1); + + /* Reset the page select to its original value */ + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, + default_page); + if (ret_val) + return ret_val; + break; + + case M88E1112_E_PHY_ID: + /* Remember the original page select and set it to 5 */ + ret_val = phy->ops.read_reg(hw, I347AT4_PAGE_SELECT, + &default_page); + if (ret_val) + return ret_val; + + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, 0x05); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, M88E1112_VCT_DSP_DISTANCE, + &phy_data); + if (ret_val) + return ret_val; + + index = (phy_data & M88E1000_PSSR_CABLE_LENGTH) >> + M88E1000_PSSR_CABLE_LENGTH_SHIFT; + + if (index >= M88E1000_CABLE_LENGTH_TABLE_SIZE - 1) + return -E1000_ERR_PHY; + + phy->min_cable_length = e1000_m88_cable_length_table[index]; + phy->max_cable_length = e1000_m88_cable_length_table[index + 1]; + + phy->cable_length = (phy->min_cable_length + + phy->max_cable_length) / 2; + + /* Reset the page select to its original value */ + ret_val = phy->ops.write_reg(hw, I347AT4_PAGE_SELECT, + default_page); + if (ret_val) + return ret_val; + + break; + default: + return -E1000_ERR_PHY; + } + + return ret_val; +} + +/** + * e1000_get_cable_length_igp_2 - Determine cable length for igp2 PHY + * @hw: pointer to the HW structure + * + * The automatic gain control (agc) normalizes the amplitude of the + * received signal, adjusting for the attenuation produced by the + * cable. By reading the AGC registers, which represent the + * combination of coarse and fine gain value, the value can be put + * into a lookup table to obtain the approximate cable length + * for each channel. + **/ +s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, i, agc_value = 0; + u16 cur_agc_index, max_agc_index = 0; + u16 min_agc_index = IGP02E1000_CABLE_LENGTH_TABLE_SIZE - 1; + static const u16 agc_reg_array[IGP02E1000_PHY_CHANNEL_NUM] = { + IGP02E1000_PHY_AGC_A, + IGP02E1000_PHY_AGC_B, + IGP02E1000_PHY_AGC_C, + IGP02E1000_PHY_AGC_D + }; + + DEBUGFUNC("e1000_get_cable_length_igp_2"); + + /* Read the AGC registers for all channels */ + for (i = 0; i < IGP02E1000_PHY_CHANNEL_NUM; i++) { + ret_val = phy->ops.read_reg(hw, agc_reg_array[i], &phy_data); + if (ret_val) + return ret_val; + + /* Getting bits 15:9, which represent the combination of + * coarse and fine gain values. The result is a number + * that can be put into the lookup table to obtain the + * approximate cable length. + */ + cur_agc_index = ((phy_data >> IGP02E1000_AGC_LENGTH_SHIFT) & + IGP02E1000_AGC_LENGTH_MASK); + + /* Array index bound check. */ + if ((cur_agc_index >= IGP02E1000_CABLE_LENGTH_TABLE_SIZE) || + (cur_agc_index == 0)) + return -E1000_ERR_PHY; + + /* Remove min & max AGC values from calculation. */ + if (e1000_igp_2_cable_length_table[min_agc_index] > + e1000_igp_2_cable_length_table[cur_agc_index]) + min_agc_index = cur_agc_index; + if (e1000_igp_2_cable_length_table[max_agc_index] < + e1000_igp_2_cable_length_table[cur_agc_index]) + max_agc_index = cur_agc_index; + + agc_value += e1000_igp_2_cable_length_table[cur_agc_index]; + } + + agc_value -= (e1000_igp_2_cable_length_table[min_agc_index] + + e1000_igp_2_cable_length_table[max_agc_index]); + agc_value /= (IGP02E1000_PHY_CHANNEL_NUM - 2); + + /* Calculate cable length with the error range of +/- 10 meters. */ + phy->min_cable_length = (((agc_value - IGP02E1000_AGC_RANGE) > 0) ? + (agc_value - IGP02E1000_AGC_RANGE) : 0); + phy->max_cable_length = agc_value + IGP02E1000_AGC_RANGE; + + phy->cable_length = (phy->min_cable_length + phy->max_cable_length) / 2; + + return E1000_SUCCESS; +} + +/** + * e1000_get_phy_info_m88 - Retrieve PHY information + * @hw: pointer to the HW structure + * + * Valid for only copper links. Read the PHY status register (sticky read) + * to verify that link is up. Read the PHY special control register to + * determine the polarity and 10base-T extended distance. Read the PHY + * special status register to determine MDI/MDIx and current speed. If + * speed is 1000, then determine cable length, local and remote receiver. + **/ +s32 e1000_get_phy_info_m88(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_m88"); + + if (phy->media_type != e1000_media_type_copper) { + DEBUGOUT("Phy info is only valid for copper media\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_CTRL, &phy_data); + if (ret_val) + return ret_val; + + phy->polarity_correction = !!(phy_data & + M88E1000_PSCR_POLARITY_REVERSAL); + + ret_val = e1000_check_polarity_m88(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, M88E1000_PHY_SPEC_STATUS, &phy_data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(phy_data & M88E1000_PSSR_MDIX); + + if ((phy_data & M88E1000_PSSR_SPEED) == M88E1000_PSSR_1000MBS) { + ret_val = hw->phy.ops.get_cable_length(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &phy_data); + if (ret_val) + return ret_val; + + phy->local_rx = (phy_data & SR_1000T_LOCAL_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + + phy->remote_rx = (phy_data & SR_1000T_REMOTE_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + } else { + /* Set values to "undefined" */ + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + } + + return ret_val; +} + +/** + * e1000_get_phy_info_igp - Retrieve igp PHY information + * @hw: pointer to the HW structure + * + * Read PHY status to determine if link is up. If link is up, then + * set/determine 10base-T extended distance and polarity correction. Read + * PHY port status to determine MDI/MDIx and speed. Based on the speed, + * determine on the cable length, local and remote receiver. + **/ +s32 e1000_get_phy_info_igp(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_igp"); + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + phy->polarity_correction = true; + + ret_val = e1000_check_polarity_igp(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, IGP01E1000_PHY_PORT_STATUS, &data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(data & IGP01E1000_PSSR_MDIX); + + if ((data & IGP01E1000_PSSR_SPEED_MASK) == + IGP01E1000_PSSR_SPEED_1000MBPS) { + ret_val = phy->ops.get_cable_length(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); + if (ret_val) + return ret_val; + + phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + + phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + } else { + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + } + + return ret_val; +} + +/** + * e1000_get_phy_info_ife - Retrieves various IFE PHY states + * @hw: pointer to the HW structure + * + * Populates "phy" structure with various feature states. + **/ +s32 e1000_get_phy_info_ife(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_ife"); + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + ret_val = phy->ops.read_reg(hw, IFE_PHY_SPECIAL_CONTROL, &data); + if (ret_val) + return ret_val; + phy->polarity_correction = !(data & IFE_PSC_AUTO_POLARITY_DISABLE); + + if (phy->polarity_correction) { + ret_val = e1000_check_polarity_ife(hw); + if (ret_val) + return ret_val; + } else { + /* Polarity is forced */ + phy->cable_polarity = ((data & IFE_PSC_FORCE_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + } + + ret_val = phy->ops.read_reg(hw, IFE_PHY_MDIX_CONTROL, &data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(data & IFE_PMC_MDIX_STATUS); + + /* The following parameters are undefined for 10/100 operation. */ + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + + return E1000_SUCCESS; +} + +/** + * e1000_phy_sw_reset_generic - PHY software reset + * @hw: pointer to the HW structure + * + * Does a software reset of the PHY by reading the PHY control register and + * setting/write the control register reset bit to the PHY. + **/ +s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw) +{ + s32 ret_val; + u16 phy_ctrl; + + DEBUGFUNC("e1000_phy_sw_reset_generic"); + + if (!hw->phy.ops.read_reg) + return E1000_SUCCESS; + + ret_val = hw->phy.ops.read_reg(hw, PHY_CONTROL, &phy_ctrl); + if (ret_val) + return ret_val; + + phy_ctrl |= MII_CR_RESET; + ret_val = hw->phy.ops.write_reg(hw, PHY_CONTROL, phy_ctrl); + if (ret_val) + return ret_val; + + usec_delay(1); + + return ret_val; +} + +/** + * e1000_phy_hw_reset_generic - PHY hardware reset + * @hw: pointer to the HW structure + * + * Verify the reset block is not blocking us from resetting. Acquire + * semaphore (if necessary) and read/set/write the device control reset + * bit in the PHY. Wait the appropriate delay time for the device to + * reset and release the semaphore (if necessary). + **/ +s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u32 ctrl; + + DEBUGFUNC("e1000_phy_hw_reset_generic"); + + if (phy->ops.check_reset_block) { + ret_val = phy->ops.check_reset_block(hw); + if (ret_val) + return E1000_SUCCESS; + } + + ret_val = phy->ops.acquire(hw); + if (ret_val) + return ret_val; + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + E1000_WRITE_REG(hw, E1000_CTRL, ctrl | E1000_CTRL_PHY_RST); + E1000_WRITE_FLUSH(hw); + + usec_delay(phy->reset_delay_us); + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + E1000_WRITE_FLUSH(hw); + + usec_delay(150); + + phy->ops.release(hw); + + return phy->ops.get_cfg_done(hw); +} + +/** + * e1000_get_cfg_done_generic - Generic configuration done + * @hw: pointer to the HW structure + * + * Generic function to wait 10 milli-seconds for configuration to complete + * and return success. + **/ +s32 e1000_get_cfg_done_generic(struct e1000_hw E1000_UNUSEDARG *hw) +{ + DEBUGFUNC("e1000_get_cfg_done_generic"); + + msec_delay_irq(10); + + return E1000_SUCCESS; +} + +/** + * e1000_phy_init_script_igp3 - Inits the IGP3 PHY + * @hw: pointer to the HW structure + * + * Initializes a Intel Gigabit PHY3 when an EEPROM is not present. + **/ +s32 e1000_phy_init_script_igp3(struct e1000_hw *hw) +{ + DEBUGOUT("Running IGP 3 PHY init script\n"); + + /* PHY init IGP 3 */ + /* Enable rise/fall, 10-mode work in class-A */ + hw->phy.ops.write_reg(hw, 0x2F5B, 0x9018); + /* Remove all caps from Replica path filter */ + hw->phy.ops.write_reg(hw, 0x2F52, 0x0000); + /* Bias trimming for ADC, AFE and Driver (Default) */ + hw->phy.ops.write_reg(hw, 0x2FB1, 0x8B24); + /* Increase Hybrid poly bias */ + hw->phy.ops.write_reg(hw, 0x2FB2, 0xF8F0); + /* Add 4% to Tx amplitude in Gig mode */ + hw->phy.ops.write_reg(hw, 0x2010, 0x10B0); + /* Disable trimming (TTT) */ + hw->phy.ops.write_reg(hw, 0x2011, 0x0000); + /* Poly DC correction to 94.6% + 2% for all channels */ + hw->phy.ops.write_reg(hw, 0x20DD, 0x249A); + /* ABS DC correction to 95.9% */ + hw->phy.ops.write_reg(hw, 0x20DE, 0x00D3); + /* BG temp curve trim */ + hw->phy.ops.write_reg(hw, 0x28B4, 0x04CE); + /* Increasing ADC OPAMP stage 1 currents to max */ + hw->phy.ops.write_reg(hw, 0x2F70, 0x29E4); + /* Force 1000 ( required for enabling PHY regs configuration) */ + hw->phy.ops.write_reg(hw, 0x0000, 0x0140); + /* Set upd_freq to 6 */ + hw->phy.ops.write_reg(hw, 0x1F30, 0x1606); + /* Disable NPDFE */ + hw->phy.ops.write_reg(hw, 0x1F31, 0xB814); + /* Disable adaptive fixed FFE (Default) */ + hw->phy.ops.write_reg(hw, 0x1F35, 0x002A); + /* Enable FFE hysteresis */ + hw->phy.ops.write_reg(hw, 0x1F3E, 0x0067); + /* Fixed FFE for short cable lengths */ + hw->phy.ops.write_reg(hw, 0x1F54, 0x0065); + /* Fixed FFE for medium cable lengths */ + hw->phy.ops.write_reg(hw, 0x1F55, 0x002A); + /* Fixed FFE for long cable lengths */ + hw->phy.ops.write_reg(hw, 0x1F56, 0x002A); + /* Enable Adaptive Clip Threshold */ + hw->phy.ops.write_reg(hw, 0x1F72, 0x3FB0); + /* AHT reset limit to 1 */ + hw->phy.ops.write_reg(hw, 0x1F76, 0xC0FF); + /* Set AHT master delay to 127 msec */ + hw->phy.ops.write_reg(hw, 0x1F77, 0x1DEC); + /* Set scan bits for AHT */ + hw->phy.ops.write_reg(hw, 0x1F78, 0xF9EF); + /* Set AHT Preset bits */ + hw->phy.ops.write_reg(hw, 0x1F79, 0x0210); + /* Change integ_factor of channel A to 3 */ + hw->phy.ops.write_reg(hw, 0x1895, 0x0003); + /* Change prop_factor of channels BCD to 8 */ + hw->phy.ops.write_reg(hw, 0x1796, 0x0008); + /* Change cg_icount + enable integbp for channels BCD */ + hw->phy.ops.write_reg(hw, 0x1798, 0xD008); + /* Change cg_icount + enable integbp + change prop_factor_master + * to 8 for channel A + */ + hw->phy.ops.write_reg(hw, 0x1898, 0xD918); + /* Disable AHT in Slave mode on channel A */ + hw->phy.ops.write_reg(hw, 0x187A, 0x0800); + /* Enable LPLU and disable AN to 1000 in non-D0a states, + * Enable SPD+B2B + */ + hw->phy.ops.write_reg(hw, 0x0019, 0x008D); + /* Enable restart AN on an1000_dis change */ + hw->phy.ops.write_reg(hw, 0x001B, 0x2080); + /* Enable wh_fifo read clock in 10/100 modes */ + hw->phy.ops.write_reg(hw, 0x0014, 0x0045); + /* Restart AN, Speed selection is 1000 */ + hw->phy.ops.write_reg(hw, 0x0000, 0x1340); + + return E1000_SUCCESS; +} + +/** + * e1000_get_phy_type_from_id - Get PHY type from id + * @phy_id: phy_id read from the phy + * + * Returns the phy type from the id. + **/ +enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id) +{ + enum e1000_phy_type phy_type = e1000_phy_unknown; + + switch (phy_id) { + case M88E1000_I_PHY_ID: + case M88E1000_E_PHY_ID: + case M88E1111_I_PHY_ID: + case M88E1011_I_PHY_ID: + case M88E1543_E_PHY_ID: + case I347AT4_E_PHY_ID: + case M88E1112_E_PHY_ID: + case M88E1340M_E_PHY_ID: + phy_type = e1000_phy_m88; + break; + case IGP01E1000_I_PHY_ID: /* IGP 1 & 2 share this */ + phy_type = e1000_phy_igp_2; + break; + case GG82563_E_PHY_ID: + phy_type = e1000_phy_gg82563; + break; + case IGP03E1000_E_PHY_ID: + phy_type = e1000_phy_igp_3; + break; + case IFE_E_PHY_ID: + case IFE_PLUS_E_PHY_ID: + case IFE_C_E_PHY_ID: + phy_type = e1000_phy_ife; + break; + case I82580_I_PHY_ID: + phy_type = e1000_phy_82580; + break; + case I210_I_PHY_ID: + phy_type = e1000_phy_i210; + break; + default: + phy_type = e1000_phy_unknown; + break; + } + return phy_type; +} + +/** + * e1000_determine_phy_address - Determines PHY address. + * @hw: pointer to the HW structure + * + * This uses a trial and error method to loop through possible PHY + * addresses. It tests each by reading the PHY ID registers and + * checking for a match. + **/ +s32 e1000_determine_phy_address(struct e1000_hw *hw) +{ + u32 phy_addr = 0; + u32 i; + enum e1000_phy_type phy_type = e1000_phy_unknown; + + hw->phy.id = phy_type; + + for (phy_addr = 0; phy_addr < E1000_MAX_PHY_ADDR; phy_addr++) { + hw->phy.addr = phy_addr; + i = 0; + + do { + e1000_get_phy_id(hw); + phy_type = e1000_get_phy_type_from_id(hw->phy.id); + + /* If phy_type is valid, break - we found our + * PHY address + */ + if (phy_type != e1000_phy_unknown) + return E1000_SUCCESS; + + msec_delay(1); + i++; + } while (i < 10); + } + + return -E1000_ERR_PHY_TYPE; +} + +/** + * e1000_power_up_phy_copper - Restore copper link in case of PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, restore the link to previous + * settings. + **/ +void e1000_power_up_phy_copper(struct e1000_hw *hw) +{ + u16 mii_reg = 0; + u16 power_reg = 0; + + /* The PHY will retain its settings across a power down/up cycle */ + hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); + mii_reg &= ~MII_CR_POWER_DOWN; + if (hw->phy.type == e1000_phy_i210) { + hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg); + power_reg &= ~GS40G_CS_POWER_DOWN; + hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg); + } + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); +} + +/** + * e1000_power_down_phy_copper - Restore copper link in case of PHY power down + * @hw: pointer to the HW structure + * + * In the case of a PHY power down to save power, or to turn off link during a + * driver unload, or wake on lan is not enabled, restore the link to previous + * settings. + **/ +void e1000_power_down_phy_copper(struct e1000_hw *hw) +{ + u16 mii_reg = 0; + u16 power_reg = 0; + + /* The PHY will retain its settings across a power down/up cycle */ + hw->phy.ops.read_reg(hw, PHY_CONTROL, &mii_reg); + mii_reg |= MII_CR_POWER_DOWN; + /* i210 Phy requires an additional bit for power up/down */ + if (hw->phy.type == e1000_phy_i210) { + hw->phy.ops.read_reg(hw, GS40G_COPPER_SPEC, &power_reg); + power_reg |= GS40G_CS_POWER_DOWN; + hw->phy.ops.write_reg(hw, GS40G_COPPER_SPEC, power_reg); + } + hw->phy.ops.write_reg(hw, PHY_CONTROL, mii_reg); + msec_delay(1); +} + +/** + * e1000_check_polarity_82577 - Checks the polarity. + * @hw: pointer to the HW structure + * + * Success returns 0, Failure returns -E1000_ERR_PHY (-2) + * + * Polarity is determined based on the PHY specific status register. + **/ +s32 e1000_check_polarity_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + + DEBUGFUNC("e1000_check_polarity_82577"); + + ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); + + if (!ret_val) + phy->cable_polarity = ((data & I82577_PHY_STATUS2_REV_POLARITY) + ? e1000_rev_polarity_reversed + : e1000_rev_polarity_normal); + + return ret_val; +} + +/** + * e1000_phy_force_speed_duplex_82577 - Force speed/duplex for I82577 PHY + * @hw: pointer to the HW structure + * + * Calls the PHY setup function to force speed and duplex. + **/ +s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data; + bool link; + + DEBUGFUNC("e1000_phy_force_speed_duplex_82577"); + + ret_val = phy->ops.read_reg(hw, PHY_CONTROL, &phy_data); + if (ret_val) + return ret_val; + + e1000_phy_force_speed_duplex_setup(hw, &phy_data); + + ret_val = phy->ops.write_reg(hw, PHY_CONTROL, phy_data); + if (ret_val) + return ret_val; + + usec_delay(1); + + if (phy->autoneg_wait_to_complete) { + DEBUGOUT("Waiting for forced speed/duplex link on 82577 phy\n"); + + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + if (ret_val) + return ret_val; + + if (!link) + DEBUGOUT("Link taking longer than expected.\n"); + + /* Try once more */ + ret_val = e1000_phy_has_link_generic(hw, PHY_FORCE_LIMIT, + 100000, &link); + } + + return ret_val; +} + +/** + * e1000_get_phy_info_82577 - Retrieve I82577 PHY information + * @hw: pointer to the HW structure + * + * Read PHY status to determine if link is up. If link is up, then + * set/determine 10base-T extended distance and polarity correction. Read + * PHY port status to determine MDI/MDIx and speed. Based on the speed, + * determine on the cable length, local and remote receiver. + **/ +s32 e1000_get_phy_info_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 data; + bool link; + + DEBUGFUNC("e1000_get_phy_info_82577"); + + ret_val = e1000_phy_has_link_generic(hw, 1, 0, &link); + if (ret_val) + return ret_val; + + if (!link) { + DEBUGOUT("Phy info is only valid if link is up\n"); + return -E1000_ERR_CONFIG; + } + + phy->polarity_correction = true; + + ret_val = e1000_check_polarity_82577(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, I82577_PHY_STATUS_2, &data); + if (ret_val) + return ret_val; + + phy->is_mdix = !!(data & I82577_PHY_STATUS2_MDIX); + + if ((data & I82577_PHY_STATUS2_SPEED_MASK) == + I82577_PHY_STATUS2_SPEED_1000MBPS) { + ret_val = hw->phy.ops.get_cable_length(hw); + if (ret_val) + return ret_val; + + ret_val = phy->ops.read_reg(hw, PHY_1000T_STATUS, &data); + if (ret_val) + return ret_val; + + phy->local_rx = (data & SR_1000T_LOCAL_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + + phy->remote_rx = (data & SR_1000T_REMOTE_RX_STATUS) + ? e1000_1000t_rx_status_ok + : e1000_1000t_rx_status_not_ok; + } else { + phy->cable_length = E1000_CABLE_LENGTH_UNDEFINED; + phy->local_rx = e1000_1000t_rx_status_undefined; + phy->remote_rx = e1000_1000t_rx_status_undefined; + } + + return E1000_SUCCESS; +} + +/** + * e1000_get_cable_length_82577 - Determine cable length for 82577 PHY + * @hw: pointer to the HW structure + * + * Reads the diagnostic status register and verifies result is valid before + * placing it in the phy_cable_length field. + **/ +s32 e1000_get_cable_length_82577(struct e1000_hw *hw) +{ + struct e1000_phy_info *phy = &hw->phy; + s32 ret_val; + u16 phy_data, length; + + DEBUGFUNC("e1000_get_cable_length_82577"); + + ret_val = phy->ops.read_reg(hw, I82577_PHY_DIAG_STATUS, &phy_data); + if (ret_val) + return ret_val; + + length = ((phy_data & I82577_DSTATUS_CABLE_LENGTH) >> + I82577_DSTATUS_CABLE_LENGTH_SHIFT); + + if (length == E1000_CABLE_LENGTH_UNDEFINED) + return -E1000_ERR_PHY; + + phy->cable_length = length; + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_gs40g - Write GS40G PHY register + * @hw: pointer to the HW structure + * @offset: register offset to write to + * @data: data to write at register offset + * + * Acquires semaphore, if necessary, then writes the data to PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data) +{ + s32 ret_val; + u16 page = offset >> GS40G_PAGE_SHIFT; + + DEBUGFUNC("e1000_write_phy_reg_gs40g"); + + offset = offset & GS40G_OFFSET_MASK; + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); + if (ret_val) + goto release; + ret_val = e1000_write_phy_reg_mdic(hw, offset, data); + +release: + hw->phy.ops.release(hw); + return ret_val; +} + +/** + * e1000_read_phy_reg_gs40g - Read GS40G PHY register + * @hw: pointer to the HW structure + * @offset: lower half is register offset to read to + * upper half is page to use. + * @data: data to read at register offset + * + * Acquires semaphore, if necessary, then reads the data in the PHY register + * at the offset. Release any acquired semaphores before exiting. + **/ +s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data) +{ + s32 ret_val; + u16 page = offset >> GS40G_PAGE_SHIFT; + + DEBUGFUNC("e1000_read_phy_reg_gs40g"); + + offset = offset & GS40G_OFFSET_MASK; + ret_val = hw->phy.ops.acquire(hw); + if (ret_val) + return ret_val; + + ret_val = e1000_write_phy_reg_mdic(hw, GS40G_PAGE_SELECT, page); + if (ret_val) + goto release; + ret_val = e1000_read_phy_reg_mdic(hw, offset, data); + +release: + hw->phy.ops.release(hw); + return ret_val; +} + +/** + * e1000_read_phy_reg_mphy - Read mPHY control register + * @hw: pointer to the HW structure + * @address: address to be read + * @data: pointer to the read data + * + * Reads the mPHY control register in the PHY at offset and stores the + * information read to data. + **/ +s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data) +{ + u32 mphy_ctrl = 0; + bool locked = false; + bool ready = false; + + DEBUGFUNC("e1000_read_phy_reg_mphy"); + + /* Check if mPHY is ready to read/write operations */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* Check if mPHY access is disabled and enable it if so */ + mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); + if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { + locked = true; + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + mphy_ctrl |= E1000_MPHY_ENA_ACCESS; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + } + + /* Set the address that we want to read */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* We mask address, because we want to use only current lane */ + mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK & + ~E1000_MPHY_ADDRESS_FNC_OVERRIDE) | + (address & E1000_MPHY_ADDRESS_MASK); + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + + /* Read data from the address */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + *data = E1000_READ_REG(hw, E1000_MPHY_DATA); + + /* Disable access to mPHY if it was originally disabled */ + if (locked) + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, + E1000_MPHY_DIS_ACCESS); + + return E1000_SUCCESS; +} + +/** + * e1000_write_phy_reg_mphy - Write mPHY control register + * @hw: pointer to the HW structure + * @address: address to write to + * @data: data to write to register at offset + * @line_override: used when we want to use different line than default one + * + * Writes data to mPHY control register. + **/ +s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, + bool line_override) +{ + u32 mphy_ctrl = 0; + bool locked = false; + bool ready = false; + + DEBUGFUNC("e1000_write_phy_reg_mphy"); + + /* Check if mPHY is ready to read/write operations */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* Check if mPHY access is disabled and enable it if so */ + mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); + if (mphy_ctrl & E1000_MPHY_DIS_ACCESS) { + locked = true; + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + mphy_ctrl |= E1000_MPHY_ENA_ACCESS; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + } + + /* Set the address that we want to read */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + + /* We mask address, because we want to use only current lane */ + if (line_override) + mphy_ctrl |= E1000_MPHY_ADDRESS_FNC_OVERRIDE; + else + mphy_ctrl &= ~E1000_MPHY_ADDRESS_FNC_OVERRIDE; + mphy_ctrl = (mphy_ctrl & ~E1000_MPHY_ADDRESS_MASK) | + (address & E1000_MPHY_ADDRESS_MASK); + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, mphy_ctrl); + + /* Read data from the address */ + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + E1000_WRITE_REG(hw, E1000_MPHY_DATA, data); + + /* Disable access to mPHY if it was originally disabled */ + if (locked) + ready = e1000_is_mphy_ready(hw); + if (!ready) + return -E1000_ERR_PHY; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTRL, + E1000_MPHY_DIS_ACCESS); + + return E1000_SUCCESS; +} + +/** + * e1000_is_mphy_ready - Check if mPHY control register is not busy + * @hw: pointer to the HW structure + * + * Returns mPHY control register status. + **/ +bool e1000_is_mphy_ready(struct e1000_hw *hw) +{ + u16 retry_count = 0; + u32 mphy_ctrl = 0; + bool ready = false; + + while (retry_count < 2) { + mphy_ctrl = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTRL); + if (mphy_ctrl & E1000_MPHY_BUSY) { + usec_delay(20); + retry_count++; + continue; + } + ready = true; + break; + } + + if (!ready) + DEBUGOUT("ERROR READING mPHY control register, phy is busy.\n"); + + return ready; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h new file mode 100644 index 00000000..5387c5e7 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_phy.h @@ -0,0 +1,256 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_PHY_H_ +#define _E1000_PHY_H_ + +void e1000_init_phy_ops_generic(struct e1000_hw *hw); +s32 e1000_null_read_reg(struct e1000_hw *hw, u32 offset, u16 *data); +void e1000_null_phy_generic(struct e1000_hw *hw); +s32 e1000_null_lplu_state(struct e1000_hw *hw, bool active); +s32 e1000_null_write_reg(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_null_set_page(struct e1000_hw *hw, u16 data); +s32 e1000_read_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +s32 e1000_write_i2c_byte_null(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); +s32 e1000_check_downshift_generic(struct e1000_hw *hw); +s32 e1000_check_polarity_m88(struct e1000_hw *hw); +s32 e1000_check_polarity_igp(struct e1000_hw *hw); +s32 e1000_check_polarity_ife(struct e1000_hw *hw); +s32 e1000_check_reset_block_generic(struct e1000_hw *hw); +s32 e1000_copper_link_setup_igp(struct e1000_hw *hw); +s32 e1000_copper_link_setup_m88(struct e1000_hw *hw); +s32 e1000_copper_link_setup_m88_gen2(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_igp(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_m88(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_ife(struct e1000_hw *hw); +s32 e1000_get_cable_length_m88(struct e1000_hw *hw); +s32 e1000_get_cable_length_m88_gen2(struct e1000_hw *hw); +s32 e1000_get_cable_length_igp_2(struct e1000_hw *hw); +s32 e1000_get_cfg_done_generic(struct e1000_hw *hw); +s32 e1000_get_phy_id(struct e1000_hw *hw); +s32 e1000_get_phy_info_igp(struct e1000_hw *hw); +s32 e1000_get_phy_info_m88(struct e1000_hw *hw); +s32 e1000_get_phy_info_ife(struct e1000_hw *hw); +s32 e1000_phy_sw_reset_generic(struct e1000_hw *hw); +void e1000_phy_force_speed_duplex_setup(struct e1000_hw *hw, u16 *phy_ctrl); +s32 e1000_phy_hw_reset_generic(struct e1000_hw *hw); +s32 e1000_phy_reset_dsp_generic(struct e1000_hw *hw); +s32 e1000_read_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_set_page_igp(struct e1000_hw *hw, u16 page); +s32 e1000_read_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_set_d3_lplu_state_generic(struct e1000_hw *hw, bool active); +s32 e1000_setup_copper_link_generic(struct e1000_hw *hw); +s32 e1000_write_kmrn_reg_generic(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_kmrn_reg_locked(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_phy_reg_igp(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_phy_reg_igp_locked(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_write_phy_reg_m88(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_phy_has_link_generic(struct e1000_hw *hw, u32 iterations, + u32 usec_interval, bool *success); +s32 e1000_phy_init_script_igp3(struct e1000_hw *hw); +enum e1000_phy_type e1000_get_phy_type_from_id(u32 phy_id); +s32 e1000_determine_phy_address(struct e1000_hw *hw); +s32 e1000_enable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg); +s32 e1000_disable_phy_wakeup_reg_access_bm(struct e1000_hw *hw, u16 *phy_reg); +void e1000_power_up_phy_copper(struct e1000_hw *hw); +void e1000_power_down_phy_copper(struct e1000_hw *hw); +s32 e1000_read_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_phy_reg_mdic(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_read_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_write_phy_reg_i2c(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_read_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 *data); +s32 e1000_write_sfp_data_byte(struct e1000_hw *hw, u16 offset, u8 data); +s32 e1000_copper_link_setup_82577(struct e1000_hw *hw); +s32 e1000_check_polarity_82577(struct e1000_hw *hw); +s32 e1000_get_phy_info_82577(struct e1000_hw *hw); +s32 e1000_phy_force_speed_duplex_82577(struct e1000_hw *hw); +s32 e1000_get_cable_length_82577(struct e1000_hw *hw); +s32 e1000_write_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 data); +s32 e1000_read_phy_reg_gs40g(struct e1000_hw *hw, u32 offset, u16 *data); +s32 e1000_read_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 *data); +s32 e1000_write_phy_reg_mphy(struct e1000_hw *hw, u32 address, u32 data, + bool line_override); +bool e1000_is_mphy_ready(struct e1000_hw *hw); + +#define E1000_MAX_PHY_ADDR 8 + +/* IGP01E1000 Specific Registers */ +#define IGP01E1000_PHY_PORT_CONFIG 0x10 /* Port Config */ +#define IGP01E1000_PHY_PORT_STATUS 0x11 /* Status */ +#define IGP01E1000_PHY_PORT_CTRL 0x12 /* Control */ +#define IGP01E1000_PHY_LINK_HEALTH 0x13 /* PHY Link Health */ +#define IGP02E1000_PHY_POWER_MGMT 0x19 /* Power Management */ +#define IGP01E1000_PHY_PAGE_SELECT 0x1F /* Page Select */ +#define BM_PHY_PAGE_SELECT 22 /* Page Select for BM */ +#define IGP_PAGE_SHIFT 5 +#define PHY_REG_MASK 0x1F + +/* GS40G - I210 PHY defines */ +#define GS40G_PAGE_SELECT 0x16 +#define GS40G_PAGE_SHIFT 16 +#define GS40G_OFFSET_MASK 0xFFFF +#define GS40G_PAGE_2 0x20000 +#define GS40G_MAC_REG2 0x15 +#define GS40G_MAC_LB 0x4140 +#define GS40G_MAC_SPEED_1G 0X0006 +#define GS40G_COPPER_SPEC 0x0010 +#define GS40G_CS_POWER_DOWN 0x0002 + +#define HV_INTC_FC_PAGE_START 768 +#define I82578_ADDR_REG 29 +#define I82577_ADDR_REG 16 +#define I82577_CFG_REG 22 +#define I82577_CFG_ASSERT_CRS_ON_TX (1 << 15) +#define I82577_CFG_ENABLE_DOWNSHIFT (3 << 10) /* auto downshift */ +#define I82577_CTRL_REG 23 + +/* 82577 specific PHY registers */ +#define I82577_PHY_CTRL_2 18 +#define I82577_PHY_LBK_CTRL 19 +#define I82577_PHY_STATUS_2 26 +#define I82577_PHY_DIAG_STATUS 31 + +/* I82577 PHY Status 2 */ +#define I82577_PHY_STATUS2_REV_POLARITY 0x0400 +#define I82577_PHY_STATUS2_MDIX 0x0800 +#define I82577_PHY_STATUS2_SPEED_MASK 0x0300 +#define I82577_PHY_STATUS2_SPEED_1000MBPS 0x0200 + +/* I82577 PHY Control 2 */ +#define I82577_PHY_CTRL2_MANUAL_MDIX 0x0200 +#define I82577_PHY_CTRL2_AUTO_MDI_MDIX 0x0400 +#define I82577_PHY_CTRL2_MDIX_CFG_MASK 0x0600 + +/* I82577 PHY Diagnostics Status */ +#define I82577_DSTATUS_CABLE_LENGTH 0x03FC +#define I82577_DSTATUS_CABLE_LENGTH_SHIFT 2 + +/* 82580 PHY Power Management */ +#define E1000_82580_PHY_POWER_MGMT 0xE14 +#define E1000_82580_PM_SPD 0x0001 /* Smart Power Down */ +#define E1000_82580_PM_D0_LPLU 0x0002 /* For D0a states */ +#define E1000_82580_PM_D3_LPLU 0x0004 /* For all other states */ +#define E1000_82580_PM_GO_LINKD 0x0020 /* Go Link Disconnect */ + +#define E1000_MPHY_DIS_ACCESS 0x80000000 /* disable_access bit */ +#define E1000_MPHY_ENA_ACCESS 0x40000000 /* enable_access bit */ +#define E1000_MPHY_BUSY 0x00010000 /* busy bit */ +#define E1000_MPHY_ADDRESS_FNC_OVERRIDE 0x20000000 /* fnc_override bit */ +#define E1000_MPHY_ADDRESS_MASK 0x0000FFFF /* address mask */ + +#define IGP01E1000_PHY_PCS_INIT_REG 0x00B4 +#define IGP01E1000_PHY_POLARITY_MASK 0x0078 + +#define IGP01E1000_PSCR_AUTO_MDIX 0x1000 +#define IGP01E1000_PSCR_FORCE_MDI_MDIX 0x2000 /* 0=MDI, 1=MDIX */ + +#define IGP01E1000_PSCFR_SMART_SPEED 0x0080 + +#define IGP02E1000_PM_SPD 0x0001 /* Smart Power Down */ +#define IGP02E1000_PM_D0_LPLU 0x0002 /* For D0a states */ +#define IGP02E1000_PM_D3_LPLU 0x0004 /* For all other states */ + +#define IGP01E1000_PLHR_SS_DOWNGRADE 0x8000 + +#define IGP01E1000_PSSR_POLARITY_REVERSED 0x0002 +#define IGP01E1000_PSSR_MDIX 0x0800 +#define IGP01E1000_PSSR_SPEED_MASK 0xC000 +#define IGP01E1000_PSSR_SPEED_1000MBPS 0xC000 + +#define IGP02E1000_PHY_CHANNEL_NUM 4 +#define IGP02E1000_PHY_AGC_A 0x11B1 +#define IGP02E1000_PHY_AGC_B 0x12B1 +#define IGP02E1000_PHY_AGC_C 0x14B1 +#define IGP02E1000_PHY_AGC_D 0x18B1 + +#define IGP02E1000_AGC_LENGTH_SHIFT 9 /* Course=15:13, Fine=12:9 */ +#define IGP02E1000_AGC_LENGTH_MASK 0x7F +#define IGP02E1000_AGC_RANGE 15 + +#define E1000_CABLE_LENGTH_UNDEFINED 0xFF + +#define E1000_KMRNCTRLSTA_OFFSET 0x001F0000 +#define E1000_KMRNCTRLSTA_OFFSET_SHIFT 16 +#define E1000_KMRNCTRLSTA_REN 0x00200000 +#define E1000_KMRNCTRLSTA_DIAG_OFFSET 0x3 /* Kumeran Diagnostic */ +#define E1000_KMRNCTRLSTA_TIMEOUTS 0x4 /* Kumeran Timeouts */ +#define E1000_KMRNCTRLSTA_INBAND_PARAM 0x9 /* Kumeran InBand Parameters */ +#define E1000_KMRNCTRLSTA_IBIST_DISABLE 0x0200 /* Kumeran IBIST Disable */ +#define E1000_KMRNCTRLSTA_DIAG_NELPBK 0x1000 /* Nearend Loopback mode */ + +#define IFE_PHY_EXTENDED_STATUS_CONTROL 0x10 +#define IFE_PHY_SPECIAL_CONTROL 0x11 /* 100BaseTx PHY Special Ctrl */ +#define IFE_PHY_SPECIAL_CONTROL_LED 0x1B /* PHY Special and LED Ctrl */ +#define IFE_PHY_MDIX_CONTROL 0x1C /* MDI/MDI-X Control */ + +/* IFE PHY Extended Status Control */ +#define IFE_PESC_POLARITY_REVERSED 0x0100 + +/* IFE PHY Special Control */ +#define IFE_PSC_AUTO_POLARITY_DISABLE 0x0010 +#define IFE_PSC_FORCE_POLARITY 0x0020 + +/* IFE PHY Special Control and LED Control */ +#define IFE_PSCL_PROBE_MODE 0x0020 +#define IFE_PSCL_PROBE_LEDS_OFF 0x0006 /* Force LEDs 0 and 2 off */ +#define IFE_PSCL_PROBE_LEDS_ON 0x0007 /* Force LEDs 0 and 2 on */ + +/* IFE PHY MDIX Control */ +#define IFE_PMC_MDIX_STATUS 0x0020 /* 1=MDI-X, 0=MDI */ +#define IFE_PMC_FORCE_MDIX 0x0040 /* 1=force MDI-X, 0=force MDI */ +#define IFE_PMC_AUTO_MDIX 0x0080 /* 1=enable auto, 0=disable */ + +/* SFP modules ID memory locations */ +#define E1000_SFF_IDENTIFIER_OFFSET 0x00 +#define E1000_SFF_IDENTIFIER_SFF 0x02 +#define E1000_SFF_IDENTIFIER_SFP 0x03 + +#define E1000_SFF_ETH_FLAGS_OFFSET 0x06 +/* Flags for SFP modules compatible with ETH up to 1Gb */ +struct sfp_e1000_flags { + u8 e1000_base_sx:1; + u8 e1000_base_lx:1; + u8 e1000_base_cx:1; + u8 e1000_base_t:1; + u8 e100_base_lx:1; + u8 e100_base_fx:1; + u8 e10_base_bx10:1; + u8 e10_base_px:1; +}; + +/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */ +#define E1000_SFF_VENDOR_OUI_TYCO 0x00407600 +#define E1000_SFF_VENDOR_OUI_FTL 0x00906500 +#define E1000_SFF_VENDOR_OUI_AVAGO 0x00176A00 +#define E1000_SFF_VENDOR_OUI_INTEL 0x001B2100 + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h new file mode 100644 index 00000000..0e083c54 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/e1000_regs.h @@ -0,0 +1,646 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _E1000_REGS_H_ +#define _E1000_REGS_H_ + +#define E1000_CTRL 0x00000 /* Device Control - RW */ +#define E1000_STATUS 0x00008 /* Device Status - RO */ +#define E1000_EECD 0x00010 /* EEPROM/Flash Control - RW */ +#define E1000_EERD 0x00014 /* EEPROM Read - RW */ +#define E1000_CTRL_EXT 0x00018 /* Extended Device Control - RW */ +#define E1000_FLA 0x0001C /* Flash Access - RW */ +#define E1000_MDIC 0x00020 /* MDI Control - RW */ +#define E1000_MDICNFG 0x00E04 /* MDI Config - RW */ +#define E1000_REGISTER_SET_SIZE 0x20000 /* CSR Size */ +#define E1000_EEPROM_INIT_CTRL_WORD_2 0x0F /* EEPROM Init Ctrl Word 2 */ +#define E1000_EEPROM_PCIE_CTRL_WORD_2 0x28 /* EEPROM PCIe Ctrl Word 2 */ +#define E1000_BARCTRL 0x5BBC /* BAR ctrl reg */ +#define E1000_BARCTRL_FLSIZE 0x0700 /* BAR ctrl Flsize */ +#define E1000_BARCTRL_CSRSIZE 0x2000 /* BAR ctrl CSR size */ +#define E1000_MPHY_ADDR_CTRL 0x0024 /* GbE MPHY Address Control */ +#define E1000_MPHY_DATA 0x0E10 /* GBE MPHY Data */ +#define E1000_MPHY_STAT 0x0E0C /* GBE MPHY Statistics */ +#define E1000_PPHY_CTRL 0x5b48 /* PCIe PHY Control */ +#define E1000_I350_BARCTRL 0x5BFC /* BAR ctrl reg */ +#define E1000_I350_DTXMXPKTSZ 0x355C /* Maximum sent packet size reg*/ +#define E1000_SCTL 0x00024 /* SerDes Control - RW */ +#define E1000_FCAL 0x00028 /* Flow Control Address Low - RW */ +#define E1000_FCAH 0x0002C /* Flow Control Address High -RW */ +#define E1000_FCT 0x00030 /* Flow Control Type - RW */ +#define E1000_CONNSW 0x00034 /* Copper/Fiber switch control - RW */ +#define E1000_VET 0x00038 /* VLAN Ether Type - RW */ +#define E1000_ICR 0x000C0 /* Interrupt Cause Read - R/clr */ +#define E1000_ITR 0x000C4 /* Interrupt Throttling Rate - RW */ +#define E1000_ICS 0x000C8 /* Interrupt Cause Set - WO */ +#define E1000_IMS 0x000D0 /* Interrupt Mask Set - RW */ +#define E1000_IMC 0x000D8 /* Interrupt Mask Clear - WO */ +#define E1000_IAM 0x000E0 /* Interrupt Acknowledge Auto Mask */ +#define E1000_RCTL 0x00100 /* Rx Control - RW */ +#define E1000_FCTTV 0x00170 /* Flow Control Transmit Timer Value - RW */ +#define E1000_TXCW 0x00178 /* Tx Configuration Word - RW */ +#define E1000_RXCW 0x00180 /* Rx Configuration Word - RO */ +#define E1000_EICR 0x01580 /* Ext. Interrupt Cause Read - R/clr */ +#define E1000_EITR(_n) (0x01680 + (0x4 * (_n))) +#define E1000_EICS 0x01520 /* Ext. Interrupt Cause Set - W0 */ +#define E1000_EIMS 0x01524 /* Ext. Interrupt Mask Set/Read - RW */ +#define E1000_EIMC 0x01528 /* Ext. Interrupt Mask Clear - WO */ +#define E1000_EIAC 0x0152C /* Ext. Interrupt Auto Clear - RW */ +#define E1000_EIAM 0x01530 /* Ext. Interrupt Ack Auto Clear Mask - RW */ +#define E1000_GPIE 0x01514 /* General Purpose Interrupt Enable - RW */ +#define E1000_IVAR0 0x01700 /* Interrupt Vector Allocation (array) - RW */ +#define E1000_IVAR_MISC 0x01740 /* IVAR for "other" causes - RW */ +#define E1000_TCTL 0x00400 /* Tx Control - RW */ +#define E1000_TCTL_EXT 0x00404 /* Extended Tx Control - RW */ +#define E1000_TIPG 0x00410 /* Tx Inter-packet gap -RW */ +#define E1000_AIT 0x00458 /* Adaptive Interframe Spacing Throttle - RW */ +#define E1000_LEDCTL 0x00E00 /* LED Control - RW */ +#define E1000_LEDMUX 0x08130 /* LED MUX Control */ +#define E1000_EXTCNF_CTRL 0x00F00 /* Extended Configuration Control */ +#define E1000_EXTCNF_SIZE 0x00F08 /* Extended Configuration Size */ +#define E1000_PHY_CTRL 0x00F10 /* PHY Control Register in CSR */ +#define E1000_PBA 0x01000 /* Packet Buffer Allocation - RW */ +#define E1000_PBS 0x01008 /* Packet Buffer Size */ +#define E1000_EEMNGCTL 0x01010 /* MNG EEprom Control */ +#define E1000_EEARBC 0x01024 /* EEPROM Auto Read Bus Control */ +#define E1000_EEWR 0x0102C /* EEPROM Write Register - RW */ +#define E1000_FLOP 0x0103C /* FLASH Opcode Register */ +#define E1000_I2CCMD 0x01028 /* SFPI2C Command Register - RW */ +#define E1000_I2CPARAMS 0x0102C /* SFPI2C Parameters Register - RW */ +#define E1000_I2CBB_EN 0x00000100 /* I2C - Bit Bang Enable */ +#define E1000_I2C_CLK_OUT 0x00000200 /* I2C- Clock */ +#define E1000_I2C_DATA_OUT 0x00000400 /* I2C- Data Out */ +#define E1000_I2C_DATA_OE_N 0x00000800 /* I2C- Data Output Enable */ +#define E1000_I2C_DATA_IN 0x00001000 /* I2C- Data In */ +#define E1000_I2C_CLK_OE_N 0x00002000 /* I2C- Clock Output Enable */ +#define E1000_I2C_CLK_IN 0x00004000 /* I2C- Clock In */ +#define E1000_I2C_CLK_STRETCH_DIS 0x00008000 /* I2C- Dis Clk Stretching */ +#define E1000_WDSTP 0x01040 /* Watchdog Setup - RW */ +#define E1000_SWDSTS 0x01044 /* SW Device Status - RW */ +#define E1000_FRTIMER 0x01048 /* Free Running Timer - RW */ +#define E1000_TCPTIMER 0x0104C /* TCP Timer - RW */ +#define E1000_VPDDIAG 0x01060 /* VPD Diagnostic - RO */ +#define E1000_ICR_V2 0x01500 /* Intr Cause - new location - RC */ +#define E1000_ICS_V2 0x01504 /* Intr Cause Set - new location - WO */ +#define E1000_IMS_V2 0x01508 /* Intr Mask Set/Read - new location - RW */ +#define E1000_IMC_V2 0x0150C /* Intr Mask Clear - new location - WO */ +#define E1000_IAM_V2 0x01510 /* Intr Ack Auto Mask - new location - RW */ +#define E1000_ERT 0x02008 /* Early Rx Threshold - RW */ +#define E1000_FCRTL 0x02160 /* Flow Control Receive Threshold Low - RW */ +#define E1000_FCRTH 0x02168 /* Flow Control Receive Threshold High - RW */ +#define E1000_PSRCTL 0x02170 /* Packet Split Receive Control - RW */ +#define E1000_RDFH 0x02410 /* Rx Data FIFO Head - RW */ +#define E1000_RDFT 0x02418 /* Rx Data FIFO Tail - RW */ +#define E1000_RDFHS 0x02420 /* Rx Data FIFO Head Saved - RW */ +#define E1000_RDFTS 0x02428 /* Rx Data FIFO Tail Saved - RW */ +#define E1000_RDFPC 0x02430 /* Rx Data FIFO Packet Count - RW */ +#define E1000_PBRTH 0x02458 /* PB Rx Arbitration Threshold - RW */ +#define E1000_FCRTV 0x02460 /* Flow Control Refresh Timer Value - RW */ +/* Split and Replication Rx Control - RW */ +#define E1000_RDPUMB 0x025CC /* DMA Rx Descriptor uC Mailbox - RW */ +#define E1000_RDPUAD 0x025D0 /* DMA Rx Descriptor uC Addr Command - RW */ +#define E1000_RDPUWD 0x025D4 /* DMA Rx Descriptor uC Data Write - RW */ +#define E1000_RDPURD 0x025D8 /* DMA Rx Descriptor uC Data Read - RW */ +#define E1000_RDPUCTL 0x025DC /* DMA Rx Descriptor uC Control - RW */ +#define E1000_PBDIAG 0x02458 /* Packet Buffer Diagnostic - RW */ +#define E1000_RXPBS 0x02404 /* Rx Packet Buffer Size - RW */ +#define E1000_IRPBS 0x02404 /* Same as RXPBS, renamed for newer Si - RW */ +#define E1000_PBRWAC 0x024E8 /* Rx packet buffer wrap around counter - RO */ +#define E1000_RDTR 0x02820 /* Rx Delay Timer - RW */ +#define E1000_RADV 0x0282C /* Rx Interrupt Absolute Delay Timer - RW */ +#define E1000_EMIADD 0x10 /* Extended Memory Indirect Address */ +#define E1000_EMIDATA 0x11 /* Extended Memory Indirect Data */ +#define E1000_SRWR 0x12018 /* Shadow Ram Write Register - RW */ +#define E1000_I210_FLMNGCTL 0x12038 +#define E1000_I210_FLMNGDATA 0x1203C +#define E1000_I210_FLMNGCNT 0x12040 + +#define E1000_I210_FLSWCTL 0x12048 +#define E1000_I210_FLSWDATA 0x1204C +#define E1000_I210_FLSWCNT 0x12050 + +#define E1000_I210_FLA 0x1201C + +#define E1000_INVM_DATA_REG(_n) (0x12120 + 4*(_n)) +#define E1000_INVM_SIZE 64 /* Number of INVM Data Registers */ + +/* QAV Tx mode control register */ +#define E1000_I210_TQAVCTRL 0x3570 + +/* QAV Tx mode control register bitfields masks */ +/* QAV enable */ +#define E1000_TQAVCTRL_MODE (1 << 0) +/* Fetching arbitration type */ +#define E1000_TQAVCTRL_FETCH_ARB (1 << 4) +/* Fetching timer enable */ +#define E1000_TQAVCTRL_FETCH_TIMER_ENABLE (1 << 5) +/* Launch arbitration type */ +#define E1000_TQAVCTRL_LAUNCH_ARB (1 << 8) +/* Launch timer enable */ +#define E1000_TQAVCTRL_LAUNCH_TIMER_ENABLE (1 << 9) +/* SP waits for SR enable */ +#define E1000_TQAVCTRL_SP_WAIT_SR (1 << 10) +/* Fetching timer correction */ +#define E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET 16 +#define E1000_TQAVCTRL_FETCH_TIMER_DELTA \ + (0xFFFF << E1000_TQAVCTRL_FETCH_TIMER_DELTA_OFFSET) + +/* High credit registers where _n can be 0 or 1. */ +#define E1000_I210_TQAVHC(_n) (0x300C + 0x40 * (_n)) + +/* Queues fetch arbitration priority control register */ +#define E1000_I210_TQAVARBCTRL 0x3574 +/* Queues priority masks where _n and _p can be 0-3. */ +#define E1000_TQAVARBCTRL_QUEUE_PRI(_n, _p) ((_p) << (2 * _n)) +/* QAV Tx mode control registers where _n can be 0 or 1. */ +#define E1000_I210_TQAVCC(_n) (0x3004 + 0x40 * (_n)) + +/* QAV Tx mode control register bitfields masks */ +#define E1000_TQAVCC_IDLE_SLOPE 0xFFFF /* Idle slope */ +#define E1000_TQAVCC_KEEP_CREDITS (1 << 30) /* Keep credits opt enable */ +#define E1000_TQAVCC_QUEUE_MODE (1 << 31) /* SP vs. SR Tx mode */ + +/* Good transmitted packets counter registers */ +#define E1000_PQGPTC(_n) (0x010014 + (0x100 * (_n))) + +/* Queues packet buffer size masks where _n can be 0-3 and _s 0-63 [kB] */ +#define E1000_I210_TXPBS_SIZE(_n, _s) ((_s) << (6 * _n)) + +#define E1000_MMDAC 13 /* MMD Access Control */ +#define E1000_MMDAAD 14 /* MMD Access Address/Data */ + +/* Convenience macros + * + * Note: "_n" is the queue number of the register to be written to. + * + * Example usage: + * E1000_RDBAL_REG(current_rx_queue) + */ +#define E1000_RDBAL(_n) ((_n) < 4 ? (0x02800 + ((_n) * 0x100)) : \ + (0x0C000 + ((_n) * 0x40))) +#define E1000_RDBAH(_n) ((_n) < 4 ? (0x02804 + ((_n) * 0x100)) : \ + (0x0C004 + ((_n) * 0x40))) +#define E1000_RDLEN(_n) ((_n) < 4 ? (0x02808 + ((_n) * 0x100)) : \ + (0x0C008 + ((_n) * 0x40))) +#define E1000_SRRCTL(_n) ((_n) < 4 ? (0x0280C + ((_n) * 0x100)) : \ + (0x0C00C + ((_n) * 0x40))) +#define E1000_RDH(_n) ((_n) < 4 ? (0x02810 + ((_n) * 0x100)) : \ + (0x0C010 + ((_n) * 0x40))) +#define E1000_RXCTL(_n) ((_n) < 4 ? (0x02814 + ((_n) * 0x100)) : \ + (0x0C014 + ((_n) * 0x40))) +#define E1000_DCA_RXCTRL(_n) E1000_RXCTL(_n) +#define E1000_RDT(_n) ((_n) < 4 ? (0x02818 + ((_n) * 0x100)) : \ + (0x0C018 + ((_n) * 0x40))) +#define E1000_RXDCTL(_n) ((_n) < 4 ? (0x02828 + ((_n) * 0x100)) : \ + (0x0C028 + ((_n) * 0x40))) +#define E1000_RQDPC(_n) ((_n) < 4 ? (0x02830 + ((_n) * 0x100)) : \ + (0x0C030 + ((_n) * 0x40))) +#define E1000_TDBAL(_n) ((_n) < 4 ? (0x03800 + ((_n) * 0x100)) : \ + (0x0E000 + ((_n) * 0x40))) +#define E1000_TDBAH(_n) ((_n) < 4 ? (0x03804 + ((_n) * 0x100)) : \ + (0x0E004 + ((_n) * 0x40))) +#define E1000_TDLEN(_n) ((_n) < 4 ? (0x03808 + ((_n) * 0x100)) : \ + (0x0E008 + ((_n) * 0x40))) +#define E1000_TDH(_n) ((_n) < 4 ? (0x03810 + ((_n) * 0x100)) : \ + (0x0E010 + ((_n) * 0x40))) +#define E1000_TXCTL(_n) ((_n) < 4 ? (0x03814 + ((_n) * 0x100)) : \ + (0x0E014 + ((_n) * 0x40))) +#define E1000_DCA_TXCTRL(_n) E1000_TXCTL(_n) +#define E1000_TDT(_n) ((_n) < 4 ? (0x03818 + ((_n) * 0x100)) : \ + (0x0E018 + ((_n) * 0x40))) +#define E1000_TXDCTL(_n) ((_n) < 4 ? (0x03828 + ((_n) * 0x100)) : \ + (0x0E028 + ((_n) * 0x40))) +#define E1000_TDWBAL(_n) ((_n) < 4 ? (0x03838 + ((_n) * 0x100)) : \ + (0x0E038 + ((_n) * 0x40))) +#define E1000_TDWBAH(_n) ((_n) < 4 ? (0x0383C + ((_n) * 0x100)) : \ + (0x0E03C + ((_n) * 0x40))) +#define E1000_TARC(_n) (0x03840 + ((_n) * 0x100)) +#define E1000_RSRPD 0x02C00 /* Rx Small Packet Detect - RW */ +#define E1000_RAID 0x02C08 /* Receive Ack Interrupt Delay - RW */ +#define E1000_KABGTXD 0x03004 /* AFE Band Gap Transmit Ref Data */ +#define E1000_PSRTYPE(_i) (0x05480 + ((_i) * 4)) +#define E1000_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ + (0x054E0 + ((_i - 16) * 8))) +#define E1000_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ + (0x054E4 + ((_i - 16) * 8))) +#define E1000_SHRAL(_i) (0x05438 + ((_i) * 8)) +#define E1000_SHRAH(_i) (0x0543C + ((_i) * 8)) +#define E1000_IP4AT_REG(_i) (0x05840 + ((_i) * 8)) +#define E1000_IP6AT_REG(_i) (0x05880 + ((_i) * 4)) +#define E1000_WUPM_REG(_i) (0x05A00 + ((_i) * 4)) +#define E1000_FFMT_REG(_i) (0x09000 + ((_i) * 8)) +#define E1000_FFVT_REG(_i) (0x09800 + ((_i) * 8)) +#define E1000_FFLT_REG(_i) (0x05F00 + ((_i) * 8)) +#define E1000_PBSLAC 0x03100 /* Pkt Buffer Slave Access Control */ +#define E1000_PBSLAD(_n) (0x03110 + (0x4 * (_n))) /* Pkt Buffer DWORD */ +#define E1000_TXPBS 0x03404 /* Tx Packet Buffer Size - RW */ +/* Same as TXPBS, renamed for newer Si - RW */ +#define E1000_ITPBS 0x03404 +#define E1000_TDFH 0x03410 /* Tx Data FIFO Head - RW */ +#define E1000_TDFT 0x03418 /* Tx Data FIFO Tail - RW */ +#define E1000_TDFHS 0x03420 /* Tx Data FIFO Head Saved - RW */ +#define E1000_TDFTS 0x03428 /* Tx Data FIFO Tail Saved - RW */ +#define E1000_TDFPC 0x03430 /* Tx Data FIFO Packet Count - RW */ +#define E1000_TDPUMB 0x0357C /* DMA Tx Desc uC Mail Box - RW */ +#define E1000_TDPUAD 0x03580 /* DMA Tx Desc uC Addr Command - RW */ +#define E1000_TDPUWD 0x03584 /* DMA Tx Desc uC Data Write - RW */ +#define E1000_TDPURD 0x03588 /* DMA Tx Desc uC Data Read - RW */ +#define E1000_TDPUCTL 0x0358C /* DMA Tx Desc uC Control - RW */ +#define E1000_DTXCTL 0x03590 /* DMA Tx Control - RW */ +#define E1000_DTXTCPFLGL 0x0359C /* DMA Tx Control flag low - RW */ +#define E1000_DTXTCPFLGH 0x035A0 /* DMA Tx Control flag high - RW */ +/* DMA Tx Max Total Allow Size Reqs - RW */ +#define E1000_DTXMXSZRQ 0x03540 +#define E1000_TIDV 0x03820 /* Tx Interrupt Delay Value - RW */ +#define E1000_TADV 0x0382C /* Tx Interrupt Absolute Delay Val - RW */ +#define E1000_CRCERRS 0x04000 /* CRC Error Count - R/clr */ +#define E1000_ALGNERRC 0x04004 /* Alignment Error Count - R/clr */ +#define E1000_SYMERRS 0x04008 /* Symbol Error Count - R/clr */ +#define E1000_RXERRC 0x0400C /* Receive Error Count - R/clr */ +#define E1000_MPC 0x04010 /* Missed Packet Count - R/clr */ +#define E1000_SCC 0x04014 /* Single Collision Count - R/clr */ +#define E1000_ECOL 0x04018 /* Excessive Collision Count - R/clr */ +#define E1000_MCC 0x0401C /* Multiple Collision Count - R/clr */ +#define E1000_LATECOL 0x04020 /* Late Collision Count - R/clr */ +#define E1000_COLC 0x04028 /* Collision Count - R/clr */ +#define E1000_DC 0x04030 /* Defer Count - R/clr */ +#define E1000_TNCRS 0x04034 /* Tx-No CRS - R/clr */ +#define E1000_SEC 0x04038 /* Sequence Error Count - R/clr */ +#define E1000_CEXTERR 0x0403C /* Carrier Extension Error Count - R/clr */ +#define E1000_RLEC 0x04040 /* Receive Length Error Count - R/clr */ +#define E1000_XONRXC 0x04048 /* XON Rx Count - R/clr */ +#define E1000_XONTXC 0x0404C /* XON Tx Count - R/clr */ +#define E1000_XOFFRXC 0x04050 /* XOFF Rx Count - R/clr */ +#define E1000_XOFFTXC 0x04054 /* XOFF Tx Count - R/clr */ +#define E1000_FCRUC 0x04058 /* Flow Control Rx Unsupported Count- R/clr */ +#define E1000_PRC64 0x0405C /* Packets Rx (64 bytes) - R/clr */ +#define E1000_PRC127 0x04060 /* Packets Rx (65-127 bytes) - R/clr */ +#define E1000_PRC255 0x04064 /* Packets Rx (128-255 bytes) - R/clr */ +#define E1000_PRC511 0x04068 /* Packets Rx (255-511 bytes) - R/clr */ +#define E1000_PRC1023 0x0406C /* Packets Rx (512-1023 bytes) - R/clr */ +#define E1000_PRC1522 0x04070 /* Packets Rx (1024-1522 bytes) - R/clr */ +#define E1000_GPRC 0x04074 /* Good Packets Rx Count - R/clr */ +#define E1000_BPRC 0x04078 /* Broadcast Packets Rx Count - R/clr */ +#define E1000_MPRC 0x0407C /* Multicast Packets Rx Count - R/clr */ +#define E1000_GPTC 0x04080 /* Good Packets Tx Count - R/clr */ +#define E1000_GORCL 0x04088 /* Good Octets Rx Count Low - R/clr */ +#define E1000_GORCH 0x0408C /* Good Octets Rx Count High - R/clr */ +#define E1000_GOTCL 0x04090 /* Good Octets Tx Count Low - R/clr */ +#define E1000_GOTCH 0x04094 /* Good Octets Tx Count High - R/clr */ +#define E1000_RNBC 0x040A0 /* Rx No Buffers Count - R/clr */ +#define E1000_RUC 0x040A4 /* Rx Undersize Count - R/clr */ +#define E1000_RFC 0x040A8 /* Rx Fragment Count - R/clr */ +#define E1000_ROC 0x040AC /* Rx Oversize Count - R/clr */ +#define E1000_RJC 0x040B0 /* Rx Jabber Count - R/clr */ +#define E1000_MGTPRC 0x040B4 /* Management Packets Rx Count - R/clr */ +#define E1000_MGTPDC 0x040B8 /* Management Packets Dropped Count - R/clr */ +#define E1000_MGTPTC 0x040BC /* Management Packets Tx Count - R/clr */ +#define E1000_TORL 0x040C0 /* Total Octets Rx Low - R/clr */ +#define E1000_TORH 0x040C4 /* Total Octets Rx High - R/clr */ +#define E1000_TOTL 0x040C8 /* Total Octets Tx Low - R/clr */ +#define E1000_TOTH 0x040CC /* Total Octets Tx High - R/clr */ +#define E1000_TPR 0x040D0 /* Total Packets Rx - R/clr */ +#define E1000_TPT 0x040D4 /* Total Packets Tx - R/clr */ +#define E1000_PTC64 0x040D8 /* Packets Tx (64 bytes) - R/clr */ +#define E1000_PTC127 0x040DC /* Packets Tx (65-127 bytes) - R/clr */ +#define E1000_PTC255 0x040E0 /* Packets Tx (128-255 bytes) - R/clr */ +#define E1000_PTC511 0x040E4 /* Packets Tx (256-511 bytes) - R/clr */ +#define E1000_PTC1023 0x040E8 /* Packets Tx (512-1023 bytes) - R/clr */ +#define E1000_PTC1522 0x040EC /* Packets Tx (1024-1522 Bytes) - R/clr */ +#define E1000_MPTC 0x040F0 /* Multicast Packets Tx Count - R/clr */ +#define E1000_BPTC 0x040F4 /* Broadcast Packets Tx Count - R/clr */ +#define E1000_TSCTC 0x040F8 /* TCP Segmentation Context Tx - R/clr */ +#define E1000_TSCTFC 0x040FC /* TCP Segmentation Context Tx Fail - R/clr */ +#define E1000_IAC 0x04100 /* Interrupt Assertion Count */ +#define E1000_ICRXPTC 0x04104 /* Interrupt Cause Rx Pkt Timer Expire Count */ +#define E1000_ICRXATC 0x04108 /* Interrupt Cause Rx Abs Timer Expire Count */ +#define E1000_ICTXPTC 0x0410C /* Interrupt Cause Tx Pkt Timer Expire Count */ +#define E1000_ICTXATC 0x04110 /* Interrupt Cause Tx Abs Timer Expire Count */ +#define E1000_ICTXQEC 0x04118 /* Interrupt Cause Tx Queue Empty Count */ +#define E1000_ICTXQMTC 0x0411C /* Interrupt Cause Tx Queue Min Thresh Count */ +#define E1000_ICRXDMTC 0x04120 /* Interrupt Cause Rx Desc Min Thresh Count */ +#define E1000_ICRXOC 0x04124 /* Interrupt Cause Receiver Overrun Count */ + +/* Virtualization statistical counters */ +#define E1000_PFVFGPRC(_n) (0x010010 + (0x100 * (_n))) +#define E1000_PFVFGPTC(_n) (0x010014 + (0x100 * (_n))) +#define E1000_PFVFGORC(_n) (0x010018 + (0x100 * (_n))) +#define E1000_PFVFGOTC(_n) (0x010034 + (0x100 * (_n))) +#define E1000_PFVFMPRC(_n) (0x010038 + (0x100 * (_n))) +#define E1000_PFVFGPRLBC(_n) (0x010040 + (0x100 * (_n))) +#define E1000_PFVFGPTLBC(_n) (0x010044 + (0x100 * (_n))) +#define E1000_PFVFGORLBC(_n) (0x010048 + (0x100 * (_n))) +#define E1000_PFVFGOTLBC(_n) (0x010050 + (0x100 * (_n))) + +/* LinkSec */ +#define E1000_LSECTXUT 0x04300 /* Tx Untagged Pkt Cnt */ +#define E1000_LSECTXPKTE 0x04304 /* Encrypted Tx Pkts Cnt */ +#define E1000_LSECTXPKTP 0x04308 /* Protected Tx Pkt Cnt */ +#define E1000_LSECTXOCTE 0x0430C /* Encrypted Tx Octets Cnt */ +#define E1000_LSECTXOCTP 0x04310 /* Protected Tx Octets Cnt */ +#define E1000_LSECRXUT 0x04314 /* Untagged non-Strict Rx Pkt Cnt */ +#define E1000_LSECRXOCTD 0x0431C /* Rx Octets Decrypted Count */ +#define E1000_LSECRXOCTV 0x04320 /* Rx Octets Validated */ +#define E1000_LSECRXBAD 0x04324 /* Rx Bad Tag */ +#define E1000_LSECRXNOSCI 0x04328 /* Rx Packet No SCI Count */ +#define E1000_LSECRXUNSCI 0x0432C /* Rx Packet Unknown SCI Count */ +#define E1000_LSECRXUNCH 0x04330 /* Rx Unchecked Packets Count */ +#define E1000_LSECRXDELAY 0x04340 /* Rx Delayed Packet Count */ +#define E1000_LSECRXLATE 0x04350 /* Rx Late Packets Count */ +#define E1000_LSECRXOK(_n) (0x04360 + (0x04 * (_n))) /* Rx Pkt OK Cnt */ +#define E1000_LSECRXINV(_n) (0x04380 + (0x04 * (_n))) /* Rx Invalid Cnt */ +#define E1000_LSECRXNV(_n) (0x043A0 + (0x04 * (_n))) /* Rx Not Valid Cnt */ +#define E1000_LSECRXUNSA 0x043C0 /* Rx Unused SA Count */ +#define E1000_LSECRXNUSA 0x043D0 /* Rx Not Using SA Count */ +#define E1000_LSECTXCAP 0x0B000 /* Tx Capabilities Register - RO */ +#define E1000_LSECRXCAP 0x0B300 /* Rx Capabilities Register - RO */ +#define E1000_LSECTXCTRL 0x0B004 /* Tx Control - RW */ +#define E1000_LSECRXCTRL 0x0B304 /* Rx Control - RW */ +#define E1000_LSECTXSCL 0x0B008 /* Tx SCI Low - RW */ +#define E1000_LSECTXSCH 0x0B00C /* Tx SCI High - RW */ +#define E1000_LSECTXSA 0x0B010 /* Tx SA0 - RW */ +#define E1000_LSECTXPN0 0x0B018 /* Tx SA PN 0 - RW */ +#define E1000_LSECTXPN1 0x0B01C /* Tx SA PN 1 - RW */ +#define E1000_LSECRXSCL 0x0B3D0 /* Rx SCI Low - RW */ +#define E1000_LSECRXSCH 0x0B3E0 /* Rx SCI High - RW */ +/* LinkSec Tx 128-bit Key 0 - WO */ +#define E1000_LSECTXKEY0(_n) (0x0B020 + (0x04 * (_n))) +/* LinkSec Tx 128-bit Key 1 - WO */ +#define E1000_LSECTXKEY1(_n) (0x0B030 + (0x04 * (_n))) +#define E1000_LSECRXSA(_n) (0x0B310 + (0x04 * (_n))) /* Rx SAs - RW */ +#define E1000_LSECRXPN(_n) (0x0B330 + (0x04 * (_n))) /* Rx SAs - RW */ +/* LinkSec Rx Keys - where _n is the SA no. and _m the 4 dwords of the 128 bit + * key - RW. + */ +#define E1000_LSECRXKEY(_n, _m) (0x0B350 + (0x10 * (_n)) + (0x04 * (_m))) + +#define E1000_SSVPC 0x041A0 /* Switch Security Violation Pkt Cnt */ +#define E1000_IPSCTRL 0xB430 /* IpSec Control Register */ +#define E1000_IPSRXCMD 0x0B408 /* IPSec Rx Command Register - RW */ +#define E1000_IPSRXIDX 0x0B400 /* IPSec Rx Index - RW */ +/* IPSec Rx IPv4/v6 Address - RW */ +#define E1000_IPSRXIPADDR(_n) (0x0B420 + (0x04 * (_n))) +/* IPSec Rx 128-bit Key - RW */ +#define E1000_IPSRXKEY(_n) (0x0B410 + (0x04 * (_n))) +#define E1000_IPSRXSALT 0x0B404 /* IPSec Rx Salt - RW */ +#define E1000_IPSRXSPI 0x0B40C /* IPSec Rx SPI - RW */ +/* IPSec Tx 128-bit Key - RW */ +#define E1000_IPSTXKEY(_n) (0x0B460 + (0x04 * (_n))) +#define E1000_IPSTXSALT 0x0B454 /* IPSec Tx Salt - RW */ +#define E1000_IPSTXIDX 0x0B450 /* IPSec Tx SA IDX - RW */ +#define E1000_PCS_CFG0 0x04200 /* PCS Configuration 0 - RW */ +#define E1000_PCS_LCTL 0x04208 /* PCS Link Control - RW */ +#define E1000_PCS_LSTAT 0x0420C /* PCS Link Status - RO */ +#define E1000_CBTMPC 0x0402C /* Circuit Breaker Tx Packet Count */ +#define E1000_HTDPMC 0x0403C /* Host Transmit Discarded Packets */ +#define E1000_CBRDPC 0x04044 /* Circuit Breaker Rx Dropped Count */ +#define E1000_CBRMPC 0x040FC /* Circuit Breaker Rx Packet Count */ +#define E1000_RPTHC 0x04104 /* Rx Packets To Host */ +#define E1000_HGPTC 0x04118 /* Host Good Packets Tx Count */ +#define E1000_HTCBDPC 0x04124 /* Host Tx Circuit Breaker Dropped Count */ +#define E1000_HGORCL 0x04128 /* Host Good Octets Received Count Low */ +#define E1000_HGORCH 0x0412C /* Host Good Octets Received Count High */ +#define E1000_HGOTCL 0x04130 /* Host Good Octets Transmit Count Low */ +#define E1000_HGOTCH 0x04134 /* Host Good Octets Transmit Count High */ +#define E1000_LENERRS 0x04138 /* Length Errors Count */ +#define E1000_SCVPC 0x04228 /* SerDes/SGMII Code Violation Pkt Count */ +#define E1000_HRMPC 0x0A018 /* Header Redirection Missed Packet Count */ +#define E1000_PCS_ANADV 0x04218 /* AN advertisement - RW */ +#define E1000_PCS_LPAB 0x0421C /* Link Partner Ability - RW */ +#define E1000_PCS_NPTX 0x04220 /* AN Next Page Transmit - RW */ +#define E1000_PCS_LPABNP 0x04224 /* Link Partner Ability Next Pg - RW */ +#define E1000_RXCSUM 0x05000 /* Rx Checksum Control - RW */ +#define E1000_RLPML 0x05004 /* Rx Long Packet Max Length */ +#define E1000_RFCTL 0x05008 /* Receive Filter Control*/ +#define E1000_MTA 0x05200 /* Multicast Table Array - RW Array */ +#define E1000_RA 0x05400 /* Receive Address - RW Array */ +#define E1000_RA2 0x054E0 /* 2nd half of Rx address array - RW Array */ +#define E1000_VFTA 0x05600 /* VLAN Filter Table Array - RW Array */ +#define E1000_VT_CTL 0x0581C /* VMDq Control - RW */ +#define E1000_CIAA 0x05B88 /* Config Indirect Access Address - RW */ +#define E1000_CIAD 0x05B8C /* Config Indirect Access Data - RW */ +#define E1000_VFQA0 0x0B000 /* VLAN Filter Queue Array 0 - RW Array */ +#define E1000_VFQA1 0x0B200 /* VLAN Filter Queue Array 1 - RW Array */ +#define E1000_WUC 0x05800 /* Wakeup Control - RW */ +#define E1000_WUFC 0x05808 /* Wakeup Filter Control - RW */ +#define E1000_WUS 0x05810 /* Wakeup Status - RO */ +#define E1000_MANC 0x05820 /* Management Control - RW */ +#define E1000_IPAV 0x05838 /* IP Address Valid - RW */ +#define E1000_IP4AT 0x05840 /* IPv4 Address Table - RW Array */ +#define E1000_IP6AT 0x05880 /* IPv6 Address Table - RW Array */ +#define E1000_WUPL 0x05900 /* Wakeup Packet Length - RW */ +#define E1000_WUPM 0x05A00 /* Wakeup Packet Memory - RO A */ +#define E1000_PBACL 0x05B68 /* MSIx PBA Clear - Read/Write 1's to clear */ +#define E1000_FFLT 0x05F00 /* Flexible Filter Length Table - RW Array */ +#define E1000_HOST_IF 0x08800 /* Host Interface */ +#define E1000_FFMT 0x09000 /* Flexible Filter Mask Table - RW Array */ +#define E1000_FFVT 0x09800 /* Flexible Filter Value Table - RW Array */ +#define E1000_HIBBA 0x8F40 /* Host Interface Buffer Base Address */ +/* Flexible Host Filter Table */ +#define E1000_FHFT(_n) (0x09000 + ((_n) * 0x100)) +/* Ext Flexible Host Filter Table */ +#define E1000_FHFT_EXT(_n) (0x09A00 + ((_n) * 0x100)) + + +#define E1000_KMRNCTRLSTA 0x00034 /* MAC-PHY interface - RW */ +#define E1000_MANC2H 0x05860 /* Management Control To Host - RW */ +/* Management Decision Filters */ +#define E1000_MDEF(_n) (0x05890 + (4 * (_n))) +#define E1000_SW_FW_SYNC 0x05B5C /* SW-FW Synchronization - RW */ +#define E1000_CCMCTL 0x05B48 /* CCM Control Register */ +#define E1000_GIOCTL 0x05B44 /* GIO Analog Control Register */ +#define E1000_SCCTL 0x05B4C /* PCIc PLL Configuration Register */ +#define E1000_GCR 0x05B00 /* PCI-Ex Control */ +#define E1000_GCR2 0x05B64 /* PCI-Ex Control #2 */ +#define E1000_GSCL_1 0x05B10 /* PCI-Ex Statistic Control #1 */ +#define E1000_GSCL_2 0x05B14 /* PCI-Ex Statistic Control #2 */ +#define E1000_GSCL_3 0x05B18 /* PCI-Ex Statistic Control #3 */ +#define E1000_GSCL_4 0x05B1C /* PCI-Ex Statistic Control #4 */ +#define E1000_FACTPS 0x05B30 /* Function Active and Power State to MNG */ +#define E1000_SWSM 0x05B50 /* SW Semaphore */ +#define E1000_FWSM 0x05B54 /* FW Semaphore */ +/* Driver-only SW semaphore (not used by BOOT agents) */ +#define E1000_SWSM2 0x05B58 +#define E1000_DCA_ID 0x05B70 /* DCA Requester ID Information - RO */ +#define E1000_DCA_CTRL 0x05B74 /* DCA Control - RW */ +#define E1000_UFUSE 0x05B78 /* UFUSE - RO */ +#define E1000_FFLT_DBG 0x05F04 /* Debug Register */ +#define E1000_HICR 0x08F00 /* Host Interface Control */ +#define E1000_FWSTS 0x08F0C /* FW Status */ + +/* RSS registers */ +#define E1000_CPUVEC 0x02C10 /* CPU Vector Register - RW */ +#define E1000_MRQC 0x05818 /* Multiple Receive Control - RW */ +#define E1000_IMIR(_i) (0x05A80 + ((_i) * 4)) /* Immediate Interrupt */ +#define E1000_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* Immediate INTR Ext*/ +#define E1000_IMIRVP 0x05AC0 /* Immediate INT Rx VLAN Priority -RW */ +#define E1000_MSIXBM(_i) (0x01600 + ((_i) * 4)) /* MSI-X Alloc Reg -RW */ +#define E1000_RETA(_i) (0x05C00 + ((_i) * 4)) /* Redirection Table - RW */ +#define E1000_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* RSS Random Key - RW */ +#define E1000_RSSIM 0x05864 /* RSS Interrupt Mask */ +#define E1000_RSSIR 0x05868 /* RSS Interrupt Request */ +/* VT Registers */ +#define E1000_SWPBS 0x03004 /* Switch Packet Buffer Size - RW */ +#define E1000_MBVFICR 0x00C80 /* Mailbox VF Cause - RWC */ +#define E1000_MBVFIMR 0x00C84 /* Mailbox VF int Mask - RW */ +#define E1000_VFLRE 0x00C88 /* VF Register Events - RWC */ +#define E1000_VFRE 0x00C8C /* VF Receive Enables */ +#define E1000_VFTE 0x00C90 /* VF Transmit Enables */ +#define E1000_QDE 0x02408 /* Queue Drop Enable - RW */ +#define E1000_DTXSWC 0x03500 /* DMA Tx Switch Control - RW */ +#define E1000_WVBR 0x03554 /* VM Wrong Behavior - RWS */ +#define E1000_RPLOLR 0x05AF0 /* Replication Offload - RW */ +#define E1000_UTA 0x0A000 /* Unicast Table Array - RW */ +#define E1000_IOVTCL 0x05BBC /* IOV Control Register */ +#define E1000_VMRCTL 0X05D80 /* Virtual Mirror Rule Control */ +#define E1000_VMRVLAN 0x05D90 /* Virtual Mirror Rule VLAN */ +#define E1000_VMRVM 0x05DA0 /* Virtual Mirror Rule VM */ +#define E1000_MDFB 0x03558 /* Malicious Driver free block */ +#define E1000_LVMMC 0x03548 /* Last VM Misbehavior cause */ +#define E1000_TXSWC 0x05ACC /* Tx Switch Control */ +#define E1000_SCCRL 0x05DB0 /* Storm Control Control */ +#define E1000_BSCTRH 0x05DB8 /* Broadcast Storm Control Threshold */ +#define E1000_MSCTRH 0x05DBC /* Multicast Storm Control Threshold */ +/* These act per VF so an array friendly macro is used */ +#define E1000_V2PMAILBOX(_n) (0x00C40 + (4 * (_n))) +#define E1000_P2VMAILBOX(_n) (0x00C00 + (4 * (_n))) +#define E1000_VMBMEM(_n) (0x00800 + (64 * (_n))) +#define E1000_VFVMBMEM(_n) (0x00800 + (_n)) +#define E1000_VMOLR(_n) (0x05AD0 + (4 * (_n))) +/* VLAN Virtual Machine Filter - RW */ +#define E1000_VLVF(_n) (0x05D00 + (4 * (_n))) +#define E1000_VMVIR(_n) (0x03700 + (4 * (_n))) +#define E1000_DVMOLR(_n) (0x0C038 + (0x40 * (_n))) /* DMA VM offload */ +#define E1000_VTCTRL(_n) (0x10000 + (0x100 * (_n))) /* VT Control */ +#define E1000_TSYNCRXCTL 0x0B620 /* Rx Time Sync Control register - RW */ +#define E1000_TSYNCTXCTL 0x0B614 /* Tx Time Sync Control register - RW */ +#define E1000_TSYNCRXCFG 0x05F50 /* Time Sync Rx Configuration - RW */ +#define E1000_RXSTMPL 0x0B624 /* Rx timestamp Low - RO */ +#define E1000_RXSTMPH 0x0B628 /* Rx timestamp High - RO */ +#define E1000_RXSATRL 0x0B62C /* Rx timestamp attribute low - RO */ +#define E1000_RXSATRH 0x0B630 /* Rx timestamp attribute high - RO */ +#define E1000_TXSTMPL 0x0B618 /* Tx timestamp value Low - RO */ +#define E1000_TXSTMPH 0x0B61C /* Tx timestamp value High - RO */ +#define E1000_SYSTIML 0x0B600 /* System time register Low - RO */ +#define E1000_SYSTIMH 0x0B604 /* System time register High - RO */ +#define E1000_TIMINCA 0x0B608 /* Increment attributes register - RW */ +#define E1000_TIMADJL 0x0B60C /* Time sync time adjustment offset Low - RW */ +#define E1000_TIMADJH 0x0B610 /* Time sync time adjustment offset High - RW */ +#define E1000_TSAUXC 0x0B640 /* Timesync Auxiliary Control register */ +#define E1000_SYSTIMR 0x0B6F8 /* System time register Residue */ +#define E1000_TSICR 0x0B66C /* Interrupt Cause Register */ +#define E1000_TSIM 0x0B674 /* Interrupt Mask Register */ + +/* Filtering Registers */ +#define E1000_SAQF(_n) (0x05980 + (4 * (_n))) /* Source Address Queue Fltr */ +#define E1000_DAQF(_n) (0x059A0 + (4 * (_n))) /* Dest Address Queue Fltr */ +#define E1000_SPQF(_n) (0x059C0 + (4 * (_n))) /* Source Port Queue Fltr */ +#define E1000_FTQF(_n) (0x059E0 + (4 * (_n))) /* 5-tuple Queue Fltr */ +#define E1000_TTQF(_n) (0x059E0 + (4 * (_n))) /* 2-tuple Queue Fltr */ +#define E1000_SYNQF(_n) (0x055FC + (4 * (_n))) /* SYN Packet Queue Fltr */ +#define E1000_ETQF(_n) (0x05CB0 + (4 * (_n))) /* EType Queue Fltr */ + +#define E1000_RTTDCS 0x3600 /* Reedtown Tx Desc plane control and status */ +#define E1000_RTTPCS 0x3474 /* Reedtown Tx Packet Plane control and status */ +#define E1000_RTRPCS 0x2474 /* Rx packet plane control and status */ +#define E1000_RTRUP2TC 0x05AC4 /* Rx User Priority to Traffic Class */ +#define E1000_RTTUP2TC 0x0418 /* Transmit User Priority to Traffic Class */ +/* Tx Desc plane TC Rate-scheduler config */ +#define E1000_RTTDTCRC(_n) (0x3610 + ((_n) * 4)) +/* Tx Packet plane TC Rate-Scheduler Config */ +#define E1000_RTTPTCRC(_n) (0x3480 + ((_n) * 4)) +/* Rx Packet plane TC Rate-Scheduler Config */ +#define E1000_RTRPTCRC(_n) (0x2480 + ((_n) * 4)) +/* Tx Desc Plane TC Rate-Scheduler Status */ +#define E1000_RTTDTCRS(_n) (0x3630 + ((_n) * 4)) +/* Tx Desc Plane TC Rate-Scheduler MMW */ +#define E1000_RTTDTCRM(_n) (0x3650 + ((_n) * 4)) +/* Tx Packet plane TC Rate-Scheduler Status */ +#define E1000_RTTPTCRS(_n) (0x34A0 + ((_n) * 4)) +/* Tx Packet plane TC Rate-scheduler MMW */ +#define E1000_RTTPTCRM(_n) (0x34C0 + ((_n) * 4)) +/* Rx Packet plane TC Rate-Scheduler Status */ +#define E1000_RTRPTCRS(_n) (0x24A0 + ((_n) * 4)) +/* Rx Packet plane TC Rate-Scheduler MMW */ +#define E1000_RTRPTCRM(_n) (0x24C0 + ((_n) * 4)) +/* Tx Desc plane VM Rate-Scheduler MMW*/ +#define E1000_RTTDVMRM(_n) (0x3670 + ((_n) * 4)) +/* Tx BCN Rate-Scheduler MMW */ +#define E1000_RTTBCNRM(_n) (0x3690 + ((_n) * 4)) +#define E1000_RTTDQSEL 0x3604 /* Tx Desc Plane Queue Select */ +#define E1000_RTTDVMRC 0x3608 /* Tx Desc Plane VM Rate-Scheduler Config */ +#define E1000_RTTDVMRS 0x360C /* Tx Desc Plane VM Rate-Scheduler Status */ +#define E1000_RTTBCNRC 0x36B0 /* Tx BCN Rate-Scheduler Config */ +#define E1000_RTTBCNRS 0x36B4 /* Tx BCN Rate-Scheduler Status */ +#define E1000_RTTBCNCR 0xB200 /* Tx BCN Control Register */ +#define E1000_RTTBCNTG 0x35A4 /* Tx BCN Tagging */ +#define E1000_RTTBCNCP 0xB208 /* Tx BCN Congestion point */ +#define E1000_RTRBCNCR 0xB20C /* Rx BCN Control Register */ +#define E1000_RTTBCNRD 0x36B8 /* Tx BCN Rate Drift */ +#define E1000_PFCTOP 0x1080 /* Priority Flow Control Type and Opcode */ +#define E1000_RTTBCNIDX 0xB204 /* Tx BCN Congestion Point */ +#define E1000_RTTBCNACH 0x0B214 /* Tx BCN Control High */ +#define E1000_RTTBCNACL 0x0B210 /* Tx BCN Control Low */ + +/* DMA Coalescing registers */ +#define E1000_DMACR 0x02508 /* Control Register */ +#define E1000_DMCTXTH 0x03550 /* Transmit Threshold */ +#define E1000_DMCTLX 0x02514 /* Time to Lx Request */ +#define E1000_DMCRTRH 0x05DD0 /* Receive Packet Rate Threshold */ +#define E1000_DMCCNT 0x05DD4 /* Current Rx Count */ +#define E1000_FCRTC 0x02170 /* Flow Control Rx high watermark */ +#define E1000_PCIEMISC 0x05BB8 /* PCIE misc config register */ + +/* PCIe Parity Status Register */ +#define E1000_PCIEERRSTS 0x05BA8 + +#define E1000_PROXYS 0x5F64 /* Proxying Status */ +#define E1000_PROXYFC 0x5F60 /* Proxying Filter Control */ +/* Thermal sensor configuration and status registers */ +#define E1000_THMJT 0x08100 /* Junction Temperature */ +#define E1000_THLOWTC 0x08104 /* Low Threshold Control */ +#define E1000_THMIDTC 0x08108 /* Mid Threshold Control */ +#define E1000_THHIGHTC 0x0810C /* High Threshold Control */ +#define E1000_THSTAT 0x08110 /* Thermal Sensor Status */ + +/* Energy Efficient Ethernet "EEE" registers */ +#define E1000_IPCNFG 0x0E38 /* Internal PHY Configuration */ +#define E1000_LTRC 0x01A0 /* Latency Tolerance Reporting Control */ +#define E1000_EEER 0x0E30 /* Energy Efficient Ethernet "EEE"*/ +#define E1000_EEE_SU 0x0E34 /* EEE Setup */ +#define E1000_TLPIC 0x4148 /* EEE Tx LPI Count - TLPIC */ +#define E1000_RLPIC 0x414C /* EEE Rx LPI Count - RLPIC */ + +/* OS2BMC Registers */ +#define E1000_B2OSPC 0x08FE0 /* BMC2OS packets sent by BMC */ +#define E1000_B2OGPRC 0x04158 /* BMC2OS packets received by host */ +#define E1000_O2BGPTC 0x08FE4 /* OS2BMC packets received by BMC */ +#define E1000_O2BSPC 0x0415C /* OS2BMC packets transmitted by host */ + + + +#endif diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h new file mode 100644 index 00000000..e5554ca3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb.h @@ -0,0 +1,859 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* Linux PRO/1000 Ethernet Driver main header file */ + +#ifndef _IGB_H_ +#define _IGB_H_ + +#include + +#ifndef IGB_NO_LRO +#include +#endif + +#undef HAVE_HW_TIME_STAMP +#ifdef HAVE_HW_TIME_STAMP +#include +#include +#include + +#endif +#ifdef SIOCETHTOOL +#include +#endif + +struct igb_adapter; + +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +//#define IGB_DCA +#endif +#ifdef IGB_DCA +#include +#endif + +#include "kcompat.h" + +#ifdef HAVE_SCTP +#include +#endif + +#include "e1000_api.h" +#include "e1000_82575.h" +#include "e1000_manage.h" +#include "e1000_mbx.h" + +#define IGB_ERR(args...) printk(KERN_ERR "igb: " args) + +#define PFX "igb: " +#define DPRINTK(nlevel, klevel, fmt, args...) \ + (void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ + printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ + __FUNCTION__ , ## args)) + +#ifdef HAVE_PTP_1588_CLOCK +#include +#include +#include +#endif /* HAVE_PTP_1588_CLOCK */ + +#ifdef HAVE_I2C_SUPPORT +#include +#include +#endif /* HAVE_I2C_SUPPORT */ + +/* Interrupt defines */ +#define IGB_START_ITR 648 /* ~6000 ints/sec */ +#define IGB_4K_ITR 980 +#define IGB_20K_ITR 196 +#define IGB_70K_ITR 56 + +/* Interrupt modes, as used by the IntMode parameter */ +#define IGB_INT_MODE_LEGACY 0 +#define IGB_INT_MODE_MSI 1 +#define IGB_INT_MODE_MSIX 2 + +/* TX/RX descriptor defines */ +#define IGB_DEFAULT_TXD 256 +#define IGB_DEFAULT_TX_WORK 128 +#define IGB_MIN_TXD 80 +#define IGB_MAX_TXD 4096 + +#define IGB_DEFAULT_RXD 256 +#define IGB_MIN_RXD 80 +#define IGB_MAX_RXD 4096 + +#define IGB_MIN_ITR_USECS 10 /* 100k irq/sec */ +#define IGB_MAX_ITR_USECS 8191 /* 120 irq/sec */ + +#define NON_Q_VECTORS 1 +#define MAX_Q_VECTORS 10 + +/* Transmit and receive queues */ +#define IGB_MAX_RX_QUEUES 16 +#define IGB_MAX_TX_QUEUES 16 + +#define IGB_MAX_VF_MC_ENTRIES 30 +#define IGB_MAX_VF_FUNCTIONS 8 +#define IGB_82576_VF_DEV_ID 0x10CA +#define IGB_I350_VF_DEV_ID 0x1520 +#define IGB_MAX_UTA_ENTRIES 128 +#define MAX_EMULATION_MAC_ADDRS 16 +#define OUI_LEN 3 +#define IGB_MAX_VMDQ_QUEUES 8 + + +struct vf_data_storage { + unsigned char vf_mac_addresses[ETH_ALEN]; + u16 vf_mc_hashes[IGB_MAX_VF_MC_ENTRIES]; + u16 num_vf_mc_hashes; + u16 default_vf_vlan_id; + u16 vlans_enabled; + unsigned char em_mac_addresses[MAX_EMULATION_MAC_ADDRS * ETH_ALEN]; + u32 uta_table_copy[IGB_MAX_UTA_ENTRIES]; + u32 flags; + unsigned long last_nack; +#ifdef IFLA_VF_MAX + u16 pf_vlan; /* When set, guest VLAN config not allowed. */ + u16 pf_qos; + u16 tx_rate; +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + bool spoofchk_enabled; +#endif +#endif +}; + +#define IGB_VF_FLAG_CTS 0x00000001 /* VF is clear to send data */ +#define IGB_VF_FLAG_UNI_PROMISC 0x00000002 /* VF has unicast promisc */ +#define IGB_VF_FLAG_MULTI_PROMISC 0x00000004 /* VF has multicast promisc */ +#define IGB_VF_FLAG_PF_SET_MAC 0x00000008 /* PF has set MAC address */ + +/* RX descriptor control thresholds. + * PTHRESH - MAC will consider prefetch if it has fewer than this number of + * descriptors available in its onboard memory. + * Setting this to 0 disables RX descriptor prefetch. + * HTHRESH - MAC will only prefetch if there are at least this many descriptors + * available in host memory. + * If PTHRESH is 0, this should also be 0. + * WTHRESH - RX descriptor writeback threshold - MAC will delay writing back + * descriptors until either it has this many to write back, or the + * ITR timer expires. + */ +#define IGB_RX_PTHRESH ((hw->mac.type == e1000_i354) ? 12 : 8) +#define IGB_RX_HTHRESH 8 +#define IGB_TX_PTHRESH ((hw->mac.type == e1000_i354) ? 20 : 8) +#define IGB_TX_HTHRESH 1 +#define IGB_RX_WTHRESH ((hw->mac.type == e1000_82576 && \ + adapter->msix_entries) ? 1 : 4) + +/* this is the size past which hardware will drop packets when setting LPE=0 */ +#define MAXIMUM_ETHERNET_VLAN_SIZE 1522 + +/* NOTE: netdev_alloc_skb reserves 16 bytes, NET_IP_ALIGN means we + * reserve 2 more, and skb_shared_info adds an additional 384 more, + * this adds roughly 448 bytes of extra data meaning the smallest + * allocation we could have is 1K. + * i.e. RXBUFFER_512 --> size-1024 slab + */ +/* Supported Rx Buffer Sizes */ +#define IGB_RXBUFFER_256 256 +#define IGB_RXBUFFER_2048 2048 +#define IGB_RXBUFFER_16384 16384 +#define IGB_RX_HDR_LEN IGB_RXBUFFER_256 +#if MAX_SKB_FRAGS < 8 +#define IGB_RX_BUFSZ ALIGN(MAX_JUMBO_FRAME_SIZE / MAX_SKB_FRAGS, 1024) +#else +#define IGB_RX_BUFSZ IGB_RXBUFFER_2048 +#endif + + +/* Packet Buffer allocations */ +#define IGB_PBA_BYTES_SHIFT 0xA +#define IGB_TX_HEAD_ADDR_SHIFT 7 +#define IGB_PBA_TX_MASK 0xFFFF0000 + +#define IGB_FC_PAUSE_TIME 0x0680 /* 858 usec */ + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define IGB_RX_BUFFER_WRITE 16 /* Must be power of 2 */ + +#define IGB_EEPROM_APME 0x0400 +#define AUTO_ALL_MODES 0 + +#ifndef IGB_MASTER_SLAVE +/* Switch to override PHY master/slave setting */ +#define IGB_MASTER_SLAVE e1000_ms_hw_default +#endif + +#define IGB_MNG_VLAN_NONE -1 + +#ifndef IGB_NO_LRO +#define IGB_LRO_MAX 32 /*Maximum number of LRO descriptors*/ +struct igb_lro_stats { + u32 flushed; + u32 coal; +}; + +/* + * igb_lro_header - header format to be aggregated by LRO + * @iph: IP header without options + * @tcp: TCP header + * @ts: Optional TCP timestamp data in TCP options + * + * This structure relies on the check above that verifies that the header + * is IPv4 and does not contain any options. + */ +struct igb_lrohdr { + struct iphdr iph; + struct tcphdr th; + __be32 ts[0]; +}; + +struct igb_lro_list { + struct sk_buff_head active; + struct igb_lro_stats stats; +}; + +#endif /* IGB_NO_LRO */ +struct igb_cb { +#ifndef IGB_NO_LRO +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + union { /* Union defining head/tail partner */ + struct sk_buff *head; + struct sk_buff *tail; + }; +#endif + __be32 tsecr; /* timestamp echo response */ + u32 tsval; /* timestamp value in host order */ + u32 next_seq; /* next expected sequence number */ + u16 free; /* 65521 minus total size */ + u16 mss; /* size of data portion of packet */ + u16 append_cnt; /* number of skb's appended */ +#endif /* IGB_NO_LRO */ +#ifdef HAVE_VLAN_RX_REGISTER + u16 vid; /* VLAN tag */ +#endif +}; +#define IGB_CB(skb) ((struct igb_cb *)(skb)->cb) + +enum igb_tx_flags { + /* cmd_type flags */ + IGB_TX_FLAGS_VLAN = 0x01, + IGB_TX_FLAGS_TSO = 0x02, + IGB_TX_FLAGS_TSTAMP = 0x04, + + /* olinfo flags */ + IGB_TX_FLAGS_IPV4 = 0x10, + IGB_TX_FLAGS_CSUM = 0x20, +}; + +/* VLAN info */ +#define IGB_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IGB_TX_FLAGS_VLAN_SHIFT 16 + +/* + * The largest size we can write to the descriptor is 65535. In order to + * maintain a power of two alignment we have to limit ourselves to 32K. + */ +#define IGB_MAX_TXD_PWR 15 +#define IGB_MAX_DATA_PER_TXD (1 << IGB_MAX_TXD_PWR) + +/* Tx Descriptors needed, worst case */ +#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IGB_MAX_DATA_PER_TXD) +#ifndef MAX_SKB_FRAGS +#define DESC_NEEDED 4 +#elif (MAX_SKB_FRAGS < 16) +#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4) +#else +#define DESC_NEEDED (MAX_SKB_FRAGS + 4) +#endif + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer */ +struct igb_tx_buffer { + union e1000_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + struct sk_buff *skb; + unsigned int bytecount; + u16 gso_segs; + __be16 protocol; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; +}; + +struct igb_rx_buffer { + dma_addr_t dma; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + struct sk_buff *skb; +#else + struct page *page; + u32 page_offset; +#endif +}; + +struct igb_tx_queue_stats { + u64 packets; + u64 bytes; + u64 restart_queue; +}; + +struct igb_rx_queue_stats { + u64 packets; + u64 bytes; + u64 drops; + u64 csum_err; + u64 alloc_failed; + u64 ipv4_packets; /* IPv4 headers processed */ + u64 ipv4e_packets; /* IPv4E headers with extensions processed */ + u64 ipv6_packets; /* IPv6 headers processed */ + u64 ipv6e_packets; /* IPv6E headers with extensions processed */ + u64 tcp_packets; /* TCP headers processed */ + u64 udp_packets; /* UDP headers processed */ + u64 sctp_packets; /* SCTP headers processed */ + u64 nfs_packets; /* NFS headers processe */ +}; + +struct igb_ring_container { + struct igb_ring *ring; /* pointer to linked list of rings */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 work_limit; /* total work allowed per interrupt */ + u8 count; /* total number of rings in vector */ + u8 itr; /* current ITR setting for ring */ +}; + +struct igb_ring { + struct igb_q_vector *q_vector; /* backlink to q_vector */ + struct net_device *netdev; /* back pointer to net_device */ + struct device *dev; /* device for dma mapping */ + union { /* array of buffer info structs */ + struct igb_tx_buffer *tx_buffer_info; + struct igb_rx_buffer *rx_buffer_info; + }; +#ifdef HAVE_PTP_1588_CLOCK + unsigned long last_rx_timestamp; +#endif /* HAVE_PTP_1588_CLOCK */ + void *desc; /* descriptor ring memory */ + unsigned long flags; /* ring specific flags */ + void __iomem *tail; /* pointer to ring tail register */ + dma_addr_t dma; /* phys address of the ring */ + unsigned int size; /* length of desc. ring in bytes */ + + u16 count; /* number of desc. in the ring */ + u8 queue_index; /* logical index of the ring*/ + u8 reg_idx; /* physical index of the ring */ + + /* everything past this point are written often */ + u16 next_to_clean; + u16 next_to_use; + u16 next_to_alloc; + + union { + /* TX */ + struct { + struct igb_tx_queue_stats tx_stats; + }; + /* RX */ + struct { + struct igb_rx_queue_stats rx_stats; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + u16 rx_buffer_len; +#else + struct sk_buff *skb; +#endif + }; + }; +#ifdef CONFIG_IGB_VMDQ_NETDEV + struct net_device *vmdq_netdev; + int vqueue_index; /* queue index for virtual netdev */ +#endif +} ____cacheline_internodealigned_in_smp; + +struct igb_q_vector { + struct igb_adapter *adapter; /* backlink */ + int cpu; /* CPU for DCA */ + u32 eims_value; /* EIMS mask value */ + + u16 itr_val; + u8 set_itr; + void __iomem *itr_register; + + struct igb_ring_container rx, tx; + + struct napi_struct napi; +#ifndef IGB_NO_LRO + struct igb_lro_list lrolist; /* LRO list for queue vector*/ +#endif + char name[IFNAMSIZ + 9]; +#ifndef HAVE_NETDEV_NAPI_LIST + struct net_device poll_dev; +#endif + + /* for dynamic allocation of rings associated with this q_vector */ + struct igb_ring ring[0] ____cacheline_internodealigned_in_smp; +}; + +enum e1000_ring_flags_t { +#ifndef HAVE_NDO_SET_FEATURES + IGB_RING_FLAG_RX_CSUM, +#endif + IGB_RING_FLAG_RX_SCTP_CSUM, + IGB_RING_FLAG_RX_LB_VLAN_BSWAP, + IGB_RING_FLAG_TX_CTX_IDX, + IGB_RING_FLAG_TX_DETECT_HANG, +}; + +struct igb_mac_addr { + u8 addr[ETH_ALEN]; + u16 queue; + u16 state; /* bitmask */ +}; +#define IGB_MAC_STATE_DEFAULT 0x1 +#define IGB_MAC_STATE_MODIFIED 0x2 +#define IGB_MAC_STATE_IN_USE 0x4 + +#define IGB_TXD_DCMD (E1000_ADVTXD_DCMD_EOP | E1000_ADVTXD_DCMD_RS) + +#define IGB_RX_DESC(R, i) \ + (&(((union e1000_adv_rx_desc *)((R)->desc))[i])) +#define IGB_TX_DESC(R, i) \ + (&(((union e1000_adv_tx_desc *)((R)->desc))[i])) +#define IGB_TX_CTXTDESC(R, i) \ + (&(((struct e1000_adv_tx_context_desc *)((R)->desc))[i])) + +#ifdef CONFIG_IGB_VMDQ_NETDEV +#define netdev_ring(ring) \ + ((ring->vmdq_netdev ? ring->vmdq_netdev : ring->netdev)) +#define ring_queue_index(ring) \ + ((ring->vmdq_netdev ? ring->vqueue_index : ring->queue_index)) +#else +#define netdev_ring(ring) (ring->netdev) +#define ring_queue_index(ring) (ring->queue_index) +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + +/* igb_test_staterr - tests bits within Rx descriptor status and error fields */ +static inline __le32 igb_test_staterr(union e1000_adv_rx_desc *rx_desc, + const u32 stat_err_bits) +{ + return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); +} + +/* igb_desc_unused - calculate if we have unused descriptors */ +static inline u16 igb_desc_unused(const struct igb_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; +} + +#ifdef CONFIG_BQL +static inline struct netdev_queue *txring_txq(const struct igb_ring *tx_ring) +{ + return netdev_get_tx_queue(tx_ring->netdev, tx_ring->queue_index); +} +#endif /* CONFIG_BQL */ + +// #ifdef EXT_THERMAL_SENSOR_SUPPORT +// #ifdef IGB_PROCFS +struct igb_therm_proc_data +{ + struct e1000_hw *hw; + struct e1000_thermal_diode_data *sensor_data; +}; + +// #endif /* IGB_PROCFS */ +// #endif /* EXT_THERMAL_SENSOR_SUPPORT */ + +#ifdef IGB_HWMON +#define IGB_HWMON_TYPE_LOC 0 +#define IGB_HWMON_TYPE_TEMP 1 +#define IGB_HWMON_TYPE_CAUTION 2 +#define IGB_HWMON_TYPE_MAX 3 + +struct hwmon_attr { + struct device_attribute dev_attr; + struct e1000_hw *hw; + struct e1000_thermal_diode_data *sensor; + char name[12]; + }; + +struct hwmon_buff { + struct device *device; + struct hwmon_attr *hwmon_list; + unsigned int n_hwmon; + }; +#endif /* IGB_HWMON */ + +/* board specific private data structure */ +struct igb_adapter { +#ifdef HAVE_VLAN_RX_REGISTER + /* vlgrp must be first member of structure */ + struct vlan_group *vlgrp; +#else + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; +#endif + struct net_device *netdev; + + unsigned long state; + unsigned int flags; + + unsigned int num_q_vectors; + struct msix_entry *msix_entries; + + + /* TX */ + u16 tx_work_limit; + u32 tx_timeout_count; + int num_tx_queues; + struct igb_ring *tx_ring[IGB_MAX_TX_QUEUES]; + + /* RX */ + int num_rx_queues; + struct igb_ring *rx_ring[IGB_MAX_RX_QUEUES]; + + struct timer_list watchdog_timer; + struct timer_list dma_err_timer; + struct timer_list phy_info_timer; + u16 mng_vlan_id; + u32 bd_number; + u32 wol; + u32 en_mng_pt; + u16 link_speed; + u16 link_duplex; + u8 port_num; + + /* Interrupt Throttle Rate */ + u32 rx_itr_setting; + u32 tx_itr_setting; + + struct work_struct reset_task; + struct work_struct watchdog_task; + struct work_struct dma_err_task; + bool fc_autoneg; + u8 tx_timeout_factor; + +#ifdef DEBUG + bool tx_hang_detected; + bool disable_hw_reset; +#endif + u32 max_frame_size; + + /* OS defined structs */ + struct pci_dev *pdev; +#ifndef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats net_stats; +#endif +#ifndef IGB_NO_LRO + struct igb_lro_stats lro_stats; +#endif + + /* structs defined in e1000_hw.h */ + struct e1000_hw hw; + struct e1000_hw_stats stats; + struct e1000_phy_info phy_info; + struct e1000_phy_stats phy_stats; + +#ifdef ETHTOOL_TEST + u32 test_icr; + struct igb_ring test_tx_ring; + struct igb_ring test_rx_ring; +#endif + + int msg_enable; + + struct igb_q_vector *q_vector[MAX_Q_VECTORS]; + u32 eims_enable_mask; + u32 eims_other; + + /* to not mess up cache alignment, always add to the bottom */ + u32 *config_space; + u16 tx_ring_count; + u16 rx_ring_count; + struct vf_data_storage *vf_data; +#ifdef IFLA_VF_MAX + int vf_rate_link_speed; +#endif + u32 lli_port; + u32 lli_size; + unsigned int vfs_allocated_count; + /* Malicious Driver Detection flag. Valid only when SR-IOV is enabled */ + bool mdd; + int int_mode; + u32 rss_queues; + u32 vmdq_pools; + char fw_version[32]; + u32 wvbr; + struct igb_mac_addr *mac_table; +#ifdef CONFIG_IGB_VMDQ_NETDEV + struct net_device *vmdq_netdev[IGB_MAX_VMDQ_QUEUES]; +#endif + int vferr_refcount; + int dmac; + u32 *shadow_vfta; + + /* External Thermal Sensor support flag */ + bool ets; +#ifdef IGB_HWMON + struct hwmon_buff igb_hwmon_buff; +#else /* IGB_HWMON */ +#ifdef IGB_PROCFS + struct proc_dir_entry *eth_dir; + struct proc_dir_entry *info_dir; + struct proc_dir_entry *therm_dir[E1000_MAX_SENSORS]; + struct igb_therm_proc_data therm_data[E1000_MAX_SENSORS]; + bool old_lsc; +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + u32 etrack_id; + +#ifdef HAVE_PTP_1588_CLOCK + struct ptp_clock *ptp_clock; + struct ptp_clock_info ptp_caps; + struct delayed_work ptp_overflow_work; + struct work_struct ptp_tx_work; + struct sk_buff *ptp_tx_skb; + unsigned long ptp_tx_start; + unsigned long last_rx_ptp_check; + spinlock_t tmreg_lock; + struct cyclecounter cc; + struct timecounter tc; + u32 tx_hwtstamp_timeouts; + u32 rx_hwtstamp_cleared; +#endif /* HAVE_PTP_1588_CLOCK */ + +#ifdef HAVE_I2C_SUPPORT + struct i2c_algo_bit_data i2c_algo; + struct i2c_adapter i2c_adap; + struct i2c_client *i2c_client; +#endif /* HAVE_I2C_SUPPORT */ + unsigned long link_check_timeout; + + + int devrc; + + int copper_tries; + u16 eee_advert; +}; + +#ifdef CONFIG_IGB_VMDQ_NETDEV +struct igb_vmdq_adapter { +#ifdef HAVE_VLAN_RX_REGISTER + /* vlgrp must be first member of structure */ + struct vlan_group *vlgrp; +#else + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; +#endif + struct igb_adapter *real_adapter; + struct net_device *vnetdev; + struct net_device_stats net_stats; + struct igb_ring *tx_ring; + struct igb_ring *rx_ring; +}; +#endif + +#define IGB_FLAG_HAS_MSI (1 << 0) +#define IGB_FLAG_DCA_ENABLED (1 << 1) +#define IGB_FLAG_LLI_PUSH (1 << 2) +#define IGB_FLAG_QUAD_PORT_A (1 << 3) +#define IGB_FLAG_QUEUE_PAIRS (1 << 4) +#define IGB_FLAG_EEE (1 << 5) +#define IGB_FLAG_DMAC (1 << 6) +#define IGB_FLAG_DETECT_BAD_DMA (1 << 7) +#define IGB_FLAG_PTP (1 << 8) +#define IGB_FLAG_RSS_FIELD_IPV4_UDP (1 << 9) +#define IGB_FLAG_RSS_FIELD_IPV6_UDP (1 << 10) +#define IGB_FLAG_WOL_SUPPORTED (1 << 11) +#define IGB_FLAG_NEED_LINK_UPDATE (1 << 12) +#define IGB_FLAG_LOOPBACK_ENABLE (1 << 13) +#define IGB_FLAG_MEDIA_RESET (1 << 14) +#define IGB_FLAG_MAS_ENABLE (1 << 15) + +/* Media Auto Sense */ +#define IGB_MAS_ENABLE_0 0X0001 +#define IGB_MAS_ENABLE_1 0X0002 +#define IGB_MAS_ENABLE_2 0X0004 +#define IGB_MAS_ENABLE_3 0X0008 + +#define IGB_MIN_TXPBSIZE 20408 +#define IGB_TX_BUF_4096 4096 + +#define IGB_DMCTLX_DCFLUSH_DIS 0x80000000 /* Disable DMA Coal Flush */ + +/* DMA Coalescing defines */ +#define IGB_DMAC_DISABLE 0 +#define IGB_DMAC_MIN 250 +#define IGB_DMAC_500 500 +#define IGB_DMAC_EN_DEFAULT 1000 +#define IGB_DMAC_2000 2000 +#define IGB_DMAC_3000 3000 +#define IGB_DMAC_4000 4000 +#define IGB_DMAC_5000 5000 +#define IGB_DMAC_6000 6000 +#define IGB_DMAC_7000 7000 +#define IGB_DMAC_8000 8000 +#define IGB_DMAC_9000 9000 +#define IGB_DMAC_MAX 10000 + +#define IGB_82576_TSYNC_SHIFT 19 +#define IGB_82580_TSYNC_SHIFT 24 +#define IGB_TS_HDR_LEN 16 + +/* CEM Support */ +#define FW_HDR_LEN 0x4 +#define FW_CMD_DRV_INFO 0xDD +#define FW_CMD_DRV_INFO_LEN 0x5 +#define FW_CMD_RESERVED 0X0 +#define FW_RESP_SUCCESS 0x1 +#define FW_UNUSED_VER 0x0 +#define FW_MAX_RETRIES 3 +#define FW_STATUS_SUCCESS 0x1 +#define FW_FAMILY_DRV_VER 0Xffffffff + +#define IGB_MAX_LINK_TRIES 20 + +struct e1000_fw_hdr { + u8 cmd; + u8 buf_len; + union + { + u8 cmd_resv; + u8 ret_status; + } cmd_or_resp; + u8 checksum; +}; + +#pragma pack(push,1) +struct e1000_fw_drv_info { + struct e1000_fw_hdr hdr; + u8 port_num; + u32 drv_version; + u16 pad; /* end spacing to ensure length is mult. of dword */ + u8 pad2; /* end spacing to ensure length is mult. of dword2 */ +}; +#pragma pack(pop) + +enum e1000_state_t { + __IGB_TESTING, + __IGB_RESETTING, + __IGB_DOWN +}; + +extern char igb_driver_name[]; +extern char igb_driver_version[]; + +extern int igb_up(struct igb_adapter *); +extern void igb_down(struct igb_adapter *); +extern void igb_reinit_locked(struct igb_adapter *); +extern void igb_reset(struct igb_adapter *); +extern int igb_set_spd_dplx(struct igb_adapter *, u16); +extern int igb_setup_tx_resources(struct igb_ring *); +extern int igb_setup_rx_resources(struct igb_ring *); +extern void igb_free_tx_resources(struct igb_ring *); +extern void igb_free_rx_resources(struct igb_ring *); +extern void igb_configure_tx_ring(struct igb_adapter *, struct igb_ring *); +extern void igb_configure_rx_ring(struct igb_adapter *, struct igb_ring *); +extern void igb_setup_tctl(struct igb_adapter *); +extern void igb_setup_rctl(struct igb_adapter *); +extern netdev_tx_t igb_xmit_frame_ring(struct sk_buff *, struct igb_ring *); +extern void igb_unmap_and_free_tx_resource(struct igb_ring *, + struct igb_tx_buffer *); +extern void igb_alloc_rx_buffers(struct igb_ring *, u16); +extern void igb_clean_rx_ring(struct igb_ring *); +extern void igb_update_stats(struct igb_adapter *); +extern bool igb_has_link(struct igb_adapter *adapter); +extern void igb_set_ethtool_ops(struct net_device *); +extern void igb_check_options(struct igb_adapter *); +extern void igb_power_up_link(struct igb_adapter *); +#ifdef HAVE_PTP_1588_CLOCK +extern void igb_ptp_init(struct igb_adapter *adapter); +extern void igb_ptp_stop(struct igb_adapter *adapter); +extern void igb_ptp_reset(struct igb_adapter *adapter); +extern void igb_ptp_tx_work(struct work_struct *work); +extern void igb_ptp_rx_hang(struct igb_adapter *adapter); +extern void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter); +extern void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, + struct sk_buff *skb); +extern void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, + unsigned char *va, + struct sk_buff *skb); +static inline void igb_ptp_rx_hwtstamp(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); + skb_pull(skb, IGB_TS_HDR_LEN); +#endif + return; + } + + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TS)) + igb_ptp_rx_rgtstamp(rx_ring->q_vector, skb); + + /* Update the last_rx_timestamp timer in order to enable watchdog check + * for error case of latched timestamp on a dropped packet. + */ + rx_ring->last_rx_timestamp = jiffies; +} + +extern int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd); +#endif /* HAVE_PTP_1588_CLOCK */ +#ifdef ETHTOOL_OPS_COMPAT +extern int ethtool_ioctl(struct ifreq *); +#endif +extern int igb_write_mc_addr_list(struct net_device *netdev); +extern int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue); +extern int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue); +extern int igb_available_rars(struct igb_adapter *adapter); +extern s32 igb_vlvf_set(struct igb_adapter *, u32, bool, u32); +extern void igb_configure_vt_default_pool(struct igb_adapter *adapter); +extern void igb_enable_vlan_tags(struct igb_adapter *adapter); +#ifndef HAVE_VLAN_RX_REGISTER +extern void igb_vlan_mode(struct net_device *, u32); +#endif + +#define E1000_PCS_CFG_IGN_SD 1 + +#ifdef IGB_HWMON +void igb_sysfs_exit(struct igb_adapter *adapter); +int igb_sysfs_init(struct igb_adapter *adapter); +#else +#ifdef IGB_PROCFS +int igb_procfs_init(struct igb_adapter* adapter); +void igb_procfs_exit(struct igb_adapter* adapter); +int igb_procfs_topdir_init(void); +void igb_procfs_topdir_exit(void); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + + + +#endif /* _IGB_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c new file mode 100644 index 00000000..c07f9f53 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_debugfs.c @@ -0,0 +1,28 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c new file mode 100644 index 00000000..af7e68a5 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ethtool.c @@ -0,0 +1,2857 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool support for igb */ + +#include +#include + +#ifdef SIOCETHTOOL +#include +#ifdef CONFIG_PM_RUNTIME +#include +#endif /* CONFIG_PM_RUNTIME */ +#include + +#include "igb.h" +#include "igb_regtest.h" +#include +#ifdef ETHTOOL_GEEE +#include +#endif + +#ifdef ETHTOOL_OPS_COMPAT +#include "kcompat_ethtool.c" +#endif +#ifdef ETHTOOL_GSTATS +struct igb_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define IGB_STAT(_name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(struct igb_adapter, _stat), \ + .stat_offset = offsetof(struct igb_adapter, _stat) \ +} +static const struct igb_stats igb_gstrings_stats[] = { + IGB_STAT("rx_packets", stats.gprc), + IGB_STAT("tx_packets", stats.gptc), + IGB_STAT("rx_bytes", stats.gorc), + IGB_STAT("tx_bytes", stats.gotc), + IGB_STAT("rx_broadcast", stats.bprc), + IGB_STAT("tx_broadcast", stats.bptc), + IGB_STAT("rx_multicast", stats.mprc), + IGB_STAT("tx_multicast", stats.mptc), + IGB_STAT("multicast", stats.mprc), + IGB_STAT("collisions", stats.colc), + IGB_STAT("rx_crc_errors", stats.crcerrs), + IGB_STAT("rx_no_buffer_count", stats.rnbc), + IGB_STAT("rx_missed_errors", stats.mpc), + IGB_STAT("tx_aborted_errors", stats.ecol), + IGB_STAT("tx_carrier_errors", stats.tncrs), + IGB_STAT("tx_window_errors", stats.latecol), + IGB_STAT("tx_abort_late_coll", stats.latecol), + IGB_STAT("tx_deferred_ok", stats.dc), + IGB_STAT("tx_single_coll_ok", stats.scc), + IGB_STAT("tx_multi_coll_ok", stats.mcc), + IGB_STAT("tx_timeout_count", tx_timeout_count), + IGB_STAT("rx_long_length_errors", stats.roc), + IGB_STAT("rx_short_length_errors", stats.ruc), + IGB_STAT("rx_align_errors", stats.algnerrc), + IGB_STAT("tx_tcp_seg_good", stats.tsctc), + IGB_STAT("tx_tcp_seg_failed", stats.tsctfc), + IGB_STAT("rx_flow_control_xon", stats.xonrxc), + IGB_STAT("rx_flow_control_xoff", stats.xoffrxc), + IGB_STAT("tx_flow_control_xon", stats.xontxc), + IGB_STAT("tx_flow_control_xoff", stats.xofftxc), + IGB_STAT("rx_long_byte_count", stats.gorc), + IGB_STAT("tx_dma_out_of_sync", stats.doosync), +#ifndef IGB_NO_LRO + IGB_STAT("lro_aggregated", lro_stats.coal), + IGB_STAT("lro_flushed", lro_stats.flushed), +#endif /* IGB_LRO */ + IGB_STAT("tx_smbus", stats.mgptc), + IGB_STAT("rx_smbus", stats.mgprc), + IGB_STAT("dropped_smbus", stats.mgpdc), + IGB_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), + IGB_STAT("os2bmc_tx_by_bmc", stats.b2ospc), + IGB_STAT("os2bmc_tx_by_host", stats.o2bspc), + IGB_STAT("os2bmc_rx_by_host", stats.b2ogprc), +#ifdef HAVE_PTP_1588_CLOCK + IGB_STAT("tx_hwtstamp_timeouts", tx_hwtstamp_timeouts), + IGB_STAT("rx_hwtstamp_cleared", rx_hwtstamp_cleared), +#endif /* HAVE_PTP_1588_CLOCK */ +}; + +#define IGB_NETDEV_STAT(_net_stat) { \ + .stat_string = #_net_stat, \ + .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ + .stat_offset = offsetof(struct net_device_stats, _net_stat) \ +} +static const struct igb_stats igb_gstrings_net_stats[] = { + IGB_NETDEV_STAT(rx_errors), + IGB_NETDEV_STAT(tx_errors), + IGB_NETDEV_STAT(tx_dropped), + IGB_NETDEV_STAT(rx_length_errors), + IGB_NETDEV_STAT(rx_over_errors), + IGB_NETDEV_STAT(rx_frame_errors), + IGB_NETDEV_STAT(rx_fifo_errors), + IGB_NETDEV_STAT(tx_fifo_errors), + IGB_NETDEV_STAT(tx_heartbeat_errors) +}; + +#define IGB_GLOBAL_STATS_LEN ARRAY_SIZE(igb_gstrings_stats) +#define IGB_NETDEV_STATS_LEN ARRAY_SIZE(igb_gstrings_net_stats) +#define IGB_RX_QUEUE_STATS_LEN \ + (sizeof(struct igb_rx_queue_stats) / sizeof(u64)) +#define IGB_TX_QUEUE_STATS_LEN \ + (sizeof(struct igb_tx_queue_stats) / sizeof(u64)) +#define IGB_QUEUE_STATS_LEN \ + ((((struct igb_adapter *)netdev_priv(netdev))->num_rx_queues * \ + IGB_RX_QUEUE_STATS_LEN) + \ + (((struct igb_adapter *)netdev_priv(netdev))->num_tx_queues * \ + IGB_TX_QUEUE_STATS_LEN)) +#define IGB_STATS_LEN \ + (IGB_GLOBAL_STATS_LEN + IGB_NETDEV_STATS_LEN + IGB_QUEUE_STATS_LEN) + +#endif /* ETHTOOL_GSTATS */ +#ifdef ETHTOOL_TEST +static const char igb_gstrings_test[][ETH_GSTRING_LEN] = { + "Register test (offline)", "Eeprom test (offline)", + "Interrupt test (offline)", "Loopback test (offline)", + "Link test (on/offline)" +}; +#define IGB_TEST_LEN (sizeof(igb_gstrings_test) / ETH_GSTRING_LEN) +#endif /* ETHTOOL_TEST */ + +static int igb_get_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 status; + + if (hw->phy.media_type == e1000_media_type_copper) { + + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Full| + SUPPORTED_Autoneg | + SUPPORTED_TP | + SUPPORTED_Pause); + ecmd->advertising = ADVERTISED_TP; + + if (hw->mac.autoneg == 1) { + ecmd->advertising |= ADVERTISED_Autoneg; + /* the e1000 autoneg seems to match ethtool nicely */ + ecmd->advertising |= hw->phy.autoneg_advertised; + } + + ecmd->port = PORT_TP; + ecmd->phy_address = hw->phy.addr; + ecmd->transceiver = XCVR_INTERNAL; + + } else { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg | + SUPPORTED_Pause); + if (hw->mac.type == e1000_i354) + ecmd->supported |= (SUPPORTED_2500baseX_Full); + + ecmd->advertising = ADVERTISED_FIBRE; + + switch (adapter->link_speed) { + case SPEED_2500: + ecmd->advertising = ADVERTISED_2500baseX_Full; + break; + case SPEED_1000: + ecmd->advertising = ADVERTISED_1000baseT_Full; + break; + case SPEED_100: + ecmd->advertising = ADVERTISED_100baseT_Full; + break; + default: + break; + } + + if (hw->mac.autoneg == 1) + ecmd->advertising |= ADVERTISED_Autoneg; + + ecmd->port = PORT_FIBRE; + ecmd->transceiver = XCVR_EXTERNAL; + } + + if (hw->mac.autoneg != 1) + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + if (hw->fc.requested_mode == e1000_fc_full) + ecmd->advertising |= ADVERTISED_Pause; + else if (hw->fc.requested_mode == e1000_fc_rx_pause) + ecmd->advertising |= (ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + else if (hw->fc.requested_mode == e1000_fc_tx_pause) + ecmd->advertising |= ADVERTISED_Asym_Pause; + else + ecmd->advertising &= ~(ADVERTISED_Pause | + ADVERTISED_Asym_Pause); + + status = E1000_READ_REG(hw, E1000_STATUS); + + if (status & E1000_STATUS_LU) { + if ((hw->mac.type == e1000_i354) && + (status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) + ecmd->speed = SPEED_2500; + else if (status & E1000_STATUS_SPEED_1000) + ecmd->speed = SPEED_1000; + else if (status & E1000_STATUS_SPEED_100) + ecmd->speed = SPEED_100; + else + ecmd->speed = SPEED_10; + + if ((status & E1000_STATUS_FD) || + hw->phy.media_type != e1000_media_type_copper) + ecmd->duplex = DUPLEX_FULL; + else + ecmd->duplex = DUPLEX_HALF; + + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + if ((hw->phy.media_type == e1000_media_type_fiber) || + hw->mac.autoneg) + ecmd->autoneg = AUTONEG_ENABLE; + else + ecmd->autoneg = AUTONEG_DISABLE; +#ifdef ETH_TP_MDI_X + + /* MDI-X => 2; MDI =>1; Invalid =>0 */ + if (hw->phy.media_type == e1000_media_type_copper) + ecmd->eth_tp_mdix = hw->phy.is_mdix ? ETH_TP_MDI_X : + ETH_TP_MDI; + else + ecmd->eth_tp_mdix = ETH_TP_MDI_INVALID; + +#ifdef ETH_TP_MDI_AUTO + if (hw->phy.mdix == AUTO_ALL_MODES) + ecmd->eth_tp_mdix_ctrl = ETH_TP_MDI_AUTO; + else + ecmd->eth_tp_mdix_ctrl = hw->phy.mdix; + +#endif +#endif /* ETH_TP_MDI_X */ + return 0; +} + +static int igb_set_settings(struct net_device *netdev, struct ethtool_cmd *ecmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + if (ecmd->duplex == DUPLEX_HALF) { + if (!hw->dev_spec._82575.eee_disable) + dev_info(pci_dev_to_dev(adapter->pdev), "EEE disabled: not supported with half duplex\n"); + hw->dev_spec._82575.eee_disable = true; + } else { + if (hw->dev_spec._82575.eee_disable) + dev_info(pci_dev_to_dev(adapter->pdev), "EEE enabled\n"); + hw->dev_spec._82575.eee_disable = false; + } + + /* When SoL/IDER sessions are active, autoneg/speed/duplex + * cannot be changed */ + if (e1000_check_reset_block(hw)) { + dev_err(pci_dev_to_dev(adapter->pdev), "Cannot change link " + "characteristics when SoL/IDER is active.\n"); + return -EINVAL; + } + +#ifdef ETH_TP_MDI_AUTO + /* + * MDI setting is only allowed when autoneg enabled because + * some hardware doesn't allow MDI setting when speed or + * duplex is forced. + */ + if (ecmd->eth_tp_mdix_ctrl) { + if (hw->phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + if ((ecmd->eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO) && + (ecmd->autoneg != AUTONEG_ENABLE)) { + dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n"); + return -EINVAL; + } + } + +#endif /* ETH_TP_MDI_AUTO */ + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (ecmd->autoneg == AUTONEG_ENABLE) { + hw->mac.autoneg = 1; + if (hw->phy.media_type == e1000_media_type_fiber) { + hw->phy.autoneg_advertised = ecmd->advertising | + ADVERTISED_FIBRE | + ADVERTISED_Autoneg; + switch (adapter->link_speed) { + case SPEED_2500: + hw->phy.autoneg_advertised = + ADVERTISED_2500baseX_Full; + break; + case SPEED_1000: + hw->phy.autoneg_advertised = + ADVERTISED_1000baseT_Full; + break; + case SPEED_100: + hw->phy.autoneg_advertised = + ADVERTISED_100baseT_Full; + break; + default: + break; + } + } else { + hw->phy.autoneg_advertised = ecmd->advertising | + ADVERTISED_TP | + ADVERTISED_Autoneg; + } + ecmd->advertising = hw->phy.autoneg_advertised; + if (adapter->fc_autoneg) + hw->fc.requested_mode = e1000_fc_default; + } else { + if (igb_set_spd_dplx(adapter, ecmd->speed + ecmd->duplex)) { + clear_bit(__IGB_RESETTING, &adapter->state); + return -EINVAL; + } + } + +#ifdef ETH_TP_MDI_AUTO + /* MDI-X => 2; MDI => 1; Auto => 3 */ + if (ecmd->eth_tp_mdix_ctrl) { + /* fix up the value for auto (3 => 0) as zero is mapped + * internally to auto + */ + if (ecmd->eth_tp_mdix_ctrl == ETH_TP_MDI_AUTO) + hw->phy.mdix = AUTO_ALL_MODES; + else + hw->phy.mdix = ecmd->eth_tp_mdix_ctrl; + } + +#endif /* ETH_TP_MDI_AUTO */ + /* reset the link */ + if (netif_running(adapter->netdev)) { + igb_down(adapter); + igb_up(adapter); + } else + igb_reset(adapter); + + clear_bit(__IGB_RESETTING, &adapter->state); + return 0; +} + +static u32 igb_get_link(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_mac_info *mac = &adapter->hw.mac; + + /* + * If the link is not reported up to netdev, interrupts are disabled, + * and so the physical link state may have changed since we last + * looked. Set get_link_status to make sure that the true link + * state is interrogated, rather than pulling a cached and possibly + * stale link state from the driver. + */ + if (!netif_carrier_ok(netdev)) + mac->get_link_status = 1; + + return igb_has_link(adapter); +} + +static void igb_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + pause->autoneg = + (adapter->fc_autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE); + + if (hw->fc.current_mode == e1000_fc_rx_pause) + pause->rx_pause = 1; + else if (hw->fc.current_mode == e1000_fc_tx_pause) + pause->tx_pause = 1; + else if (hw->fc.current_mode == e1000_fc_full) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +static int igb_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + int retval = 0; + + adapter->fc_autoneg = pause->autoneg; + + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (adapter->fc_autoneg == AUTONEG_ENABLE) { + hw->fc.requested_mode = e1000_fc_default; + if (netif_running(adapter->netdev)) { + igb_down(adapter); + igb_up(adapter); + } else { + igb_reset(adapter); + } + } else { + if (pause->rx_pause && pause->tx_pause) + hw->fc.requested_mode = e1000_fc_full; + else if (pause->rx_pause && !pause->tx_pause) + hw->fc.requested_mode = e1000_fc_rx_pause; + else if (!pause->rx_pause && pause->tx_pause) + hw->fc.requested_mode = e1000_fc_tx_pause; + else if (!pause->rx_pause && !pause->tx_pause) + hw->fc.requested_mode = e1000_fc_none; + + hw->fc.current_mode = hw->fc.requested_mode; + + if (hw->phy.media_type == e1000_media_type_fiber) { + retval = hw->mac.ops.setup_link(hw); + /* implicit goto out */ + } else { + retval = e1000_force_mac_fc(hw); + if (retval) + goto out; + e1000_set_fc_watermarks_generic(hw); + } + } + +out: + clear_bit(__IGB_RESETTING, &adapter->state); + return retval; +} + +static u32 igb_get_msglevel(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + return adapter->msg_enable; +} + +static void igb_set_msglevel(struct net_device *netdev, u32 data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + adapter->msg_enable = data; +} + +static int igb_get_regs_len(struct net_device *netdev) +{ +#define IGB_REGS_LEN 555 + return IGB_REGS_LEN * sizeof(u32); +} + +static void igb_get_regs(struct net_device *netdev, + struct ethtool_regs *regs, void *p) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 *regs_buff = p; + u8 i; + + memset(p, 0, IGB_REGS_LEN * sizeof(u32)); + + regs->version = (1 << 24) | (hw->revision_id << 16) | hw->device_id; + + /* General Registers */ + regs_buff[0] = E1000_READ_REG(hw, E1000_CTRL); + regs_buff[1] = E1000_READ_REG(hw, E1000_STATUS); + regs_buff[2] = E1000_READ_REG(hw, E1000_CTRL_EXT); + regs_buff[3] = E1000_READ_REG(hw, E1000_MDIC); + regs_buff[4] = E1000_READ_REG(hw, E1000_SCTL); + regs_buff[5] = E1000_READ_REG(hw, E1000_CONNSW); + regs_buff[6] = E1000_READ_REG(hw, E1000_VET); + regs_buff[7] = E1000_READ_REG(hw, E1000_LEDCTL); + regs_buff[8] = E1000_READ_REG(hw, E1000_PBA); + regs_buff[9] = E1000_READ_REG(hw, E1000_PBS); + regs_buff[10] = E1000_READ_REG(hw, E1000_FRTIMER); + regs_buff[11] = E1000_READ_REG(hw, E1000_TCPTIMER); + + /* NVM Register */ + regs_buff[12] = E1000_READ_REG(hw, E1000_EECD); + + /* Interrupt */ + /* Reading EICS for EICR because they read the + * same but EICS does not clear on read */ + regs_buff[13] = E1000_READ_REG(hw, E1000_EICS); + regs_buff[14] = E1000_READ_REG(hw, E1000_EICS); + regs_buff[15] = E1000_READ_REG(hw, E1000_EIMS); + regs_buff[16] = E1000_READ_REG(hw, E1000_EIMC); + regs_buff[17] = E1000_READ_REG(hw, E1000_EIAC); + regs_buff[18] = E1000_READ_REG(hw, E1000_EIAM); + /* Reading ICS for ICR because they read the + * same but ICS does not clear on read */ + regs_buff[19] = E1000_READ_REG(hw, E1000_ICS); + regs_buff[20] = E1000_READ_REG(hw, E1000_ICS); + regs_buff[21] = E1000_READ_REG(hw, E1000_IMS); + regs_buff[22] = E1000_READ_REG(hw, E1000_IMC); + regs_buff[23] = E1000_READ_REG(hw, E1000_IAC); + regs_buff[24] = E1000_READ_REG(hw, E1000_IAM); + regs_buff[25] = E1000_READ_REG(hw, E1000_IMIRVP); + + /* Flow Control */ + regs_buff[26] = E1000_READ_REG(hw, E1000_FCAL); + regs_buff[27] = E1000_READ_REG(hw, E1000_FCAH); + regs_buff[28] = E1000_READ_REG(hw, E1000_FCTTV); + regs_buff[29] = E1000_READ_REG(hw, E1000_FCRTL); + regs_buff[30] = E1000_READ_REG(hw, E1000_FCRTH); + regs_buff[31] = E1000_READ_REG(hw, E1000_FCRTV); + + /* Receive */ + regs_buff[32] = E1000_READ_REG(hw, E1000_RCTL); + regs_buff[33] = E1000_READ_REG(hw, E1000_RXCSUM); + regs_buff[34] = E1000_READ_REG(hw, E1000_RLPML); + regs_buff[35] = E1000_READ_REG(hw, E1000_RFCTL); + regs_buff[36] = E1000_READ_REG(hw, E1000_MRQC); + regs_buff[37] = E1000_READ_REG(hw, E1000_VT_CTL); + + /* Transmit */ + regs_buff[38] = E1000_READ_REG(hw, E1000_TCTL); + regs_buff[39] = E1000_READ_REG(hw, E1000_TCTL_EXT); + regs_buff[40] = E1000_READ_REG(hw, E1000_TIPG); + regs_buff[41] = E1000_READ_REG(hw, E1000_DTXCTL); + + /* Wake Up */ + regs_buff[42] = E1000_READ_REG(hw, E1000_WUC); + regs_buff[43] = E1000_READ_REG(hw, E1000_WUFC); + regs_buff[44] = E1000_READ_REG(hw, E1000_WUS); + regs_buff[45] = E1000_READ_REG(hw, E1000_IPAV); + regs_buff[46] = E1000_READ_REG(hw, E1000_WUPL); + + /* MAC */ + regs_buff[47] = E1000_READ_REG(hw, E1000_PCS_CFG0); + regs_buff[48] = E1000_READ_REG(hw, E1000_PCS_LCTL); + regs_buff[49] = E1000_READ_REG(hw, E1000_PCS_LSTAT); + regs_buff[50] = E1000_READ_REG(hw, E1000_PCS_ANADV); + regs_buff[51] = E1000_READ_REG(hw, E1000_PCS_LPAB); + regs_buff[52] = E1000_READ_REG(hw, E1000_PCS_NPTX); + regs_buff[53] = E1000_READ_REG(hw, E1000_PCS_LPABNP); + + /* Statistics */ + regs_buff[54] = adapter->stats.crcerrs; + regs_buff[55] = adapter->stats.algnerrc; + regs_buff[56] = adapter->stats.symerrs; + regs_buff[57] = adapter->stats.rxerrc; + regs_buff[58] = adapter->stats.mpc; + regs_buff[59] = adapter->stats.scc; + regs_buff[60] = adapter->stats.ecol; + regs_buff[61] = adapter->stats.mcc; + regs_buff[62] = adapter->stats.latecol; + regs_buff[63] = adapter->stats.colc; + regs_buff[64] = adapter->stats.dc; + regs_buff[65] = adapter->stats.tncrs; + regs_buff[66] = adapter->stats.sec; + regs_buff[67] = adapter->stats.htdpmc; + regs_buff[68] = adapter->stats.rlec; + regs_buff[69] = adapter->stats.xonrxc; + regs_buff[70] = adapter->stats.xontxc; + regs_buff[71] = adapter->stats.xoffrxc; + regs_buff[72] = adapter->stats.xofftxc; + regs_buff[73] = adapter->stats.fcruc; + regs_buff[74] = adapter->stats.prc64; + regs_buff[75] = adapter->stats.prc127; + regs_buff[76] = adapter->stats.prc255; + regs_buff[77] = adapter->stats.prc511; + regs_buff[78] = adapter->stats.prc1023; + regs_buff[79] = adapter->stats.prc1522; + regs_buff[80] = adapter->stats.gprc; + regs_buff[81] = adapter->stats.bprc; + regs_buff[82] = adapter->stats.mprc; + regs_buff[83] = adapter->stats.gptc; + regs_buff[84] = adapter->stats.gorc; + regs_buff[86] = adapter->stats.gotc; + regs_buff[88] = adapter->stats.rnbc; + regs_buff[89] = adapter->stats.ruc; + regs_buff[90] = adapter->stats.rfc; + regs_buff[91] = adapter->stats.roc; + regs_buff[92] = adapter->stats.rjc; + regs_buff[93] = adapter->stats.mgprc; + regs_buff[94] = adapter->stats.mgpdc; + regs_buff[95] = adapter->stats.mgptc; + regs_buff[96] = adapter->stats.tor; + regs_buff[98] = adapter->stats.tot; + regs_buff[100] = adapter->stats.tpr; + regs_buff[101] = adapter->stats.tpt; + regs_buff[102] = adapter->stats.ptc64; + regs_buff[103] = adapter->stats.ptc127; + regs_buff[104] = adapter->stats.ptc255; + regs_buff[105] = adapter->stats.ptc511; + regs_buff[106] = adapter->stats.ptc1023; + regs_buff[107] = adapter->stats.ptc1522; + regs_buff[108] = adapter->stats.mptc; + regs_buff[109] = adapter->stats.bptc; + regs_buff[110] = adapter->stats.tsctc; + regs_buff[111] = adapter->stats.iac; + regs_buff[112] = adapter->stats.rpthc; + regs_buff[113] = adapter->stats.hgptc; + regs_buff[114] = adapter->stats.hgorc; + regs_buff[116] = adapter->stats.hgotc; + regs_buff[118] = adapter->stats.lenerrs; + regs_buff[119] = adapter->stats.scvpc; + regs_buff[120] = adapter->stats.hrmpc; + + for (i = 0; i < 4; i++) + regs_buff[121 + i] = E1000_READ_REG(hw, E1000_SRRCTL(i)); + for (i = 0; i < 4; i++) + regs_buff[125 + i] = E1000_READ_REG(hw, E1000_PSRTYPE(i)); + for (i = 0; i < 4; i++) + regs_buff[129 + i] = E1000_READ_REG(hw, E1000_RDBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[133 + i] = E1000_READ_REG(hw, E1000_RDBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[137 + i] = E1000_READ_REG(hw, E1000_RDLEN(i)); + for (i = 0; i < 4; i++) + regs_buff[141 + i] = E1000_READ_REG(hw, E1000_RDH(i)); + for (i = 0; i < 4; i++) + regs_buff[145 + i] = E1000_READ_REG(hw, E1000_RDT(i)); + for (i = 0; i < 4; i++) + regs_buff[149 + i] = E1000_READ_REG(hw, E1000_RXDCTL(i)); + + for (i = 0; i < 10; i++) + regs_buff[153 + i] = E1000_READ_REG(hw, E1000_EITR(i)); + for (i = 0; i < 8; i++) + regs_buff[163 + i] = E1000_READ_REG(hw, E1000_IMIR(i)); + for (i = 0; i < 8; i++) + regs_buff[171 + i] = E1000_READ_REG(hw, E1000_IMIREXT(i)); + for (i = 0; i < 16; i++) + regs_buff[179 + i] = E1000_READ_REG(hw, E1000_RAL(i)); + for (i = 0; i < 16; i++) + regs_buff[195 + i] = E1000_READ_REG(hw, E1000_RAH(i)); + + for (i = 0; i < 4; i++) + regs_buff[211 + i] = E1000_READ_REG(hw, E1000_TDBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[215 + i] = E1000_READ_REG(hw, E1000_TDBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[219 + i] = E1000_READ_REG(hw, E1000_TDLEN(i)); + for (i = 0; i < 4; i++) + regs_buff[223 + i] = E1000_READ_REG(hw, E1000_TDH(i)); + for (i = 0; i < 4; i++) + regs_buff[227 + i] = E1000_READ_REG(hw, E1000_TDT(i)); + for (i = 0; i < 4; i++) + regs_buff[231 + i] = E1000_READ_REG(hw, E1000_TXDCTL(i)); + for (i = 0; i < 4; i++) + regs_buff[235 + i] = E1000_READ_REG(hw, E1000_TDWBAL(i)); + for (i = 0; i < 4; i++) + regs_buff[239 + i] = E1000_READ_REG(hw, E1000_TDWBAH(i)); + for (i = 0; i < 4; i++) + regs_buff[243 + i] = E1000_READ_REG(hw, E1000_DCA_TXCTRL(i)); + + for (i = 0; i < 4; i++) + regs_buff[247 + i] = E1000_READ_REG(hw, E1000_IP4AT_REG(i)); + for (i = 0; i < 4; i++) + regs_buff[251 + i] = E1000_READ_REG(hw, E1000_IP6AT_REG(i)); + for (i = 0; i < 32; i++) + regs_buff[255 + i] = E1000_READ_REG(hw, E1000_WUPM_REG(i)); + for (i = 0; i < 128; i++) + regs_buff[287 + i] = E1000_READ_REG(hw, E1000_FFMT_REG(i)); + for (i = 0; i < 128; i++) + regs_buff[415 + i] = E1000_READ_REG(hw, E1000_FFVT_REG(i)); + for (i = 0; i < 4; i++) + regs_buff[543 + i] = E1000_READ_REG(hw, E1000_FFLT_REG(i)); + + regs_buff[547] = E1000_READ_REG(hw, E1000_TDFH); + regs_buff[548] = E1000_READ_REG(hw, E1000_TDFT); + regs_buff[549] = E1000_READ_REG(hw, E1000_TDFHS); + regs_buff[550] = E1000_READ_REG(hw, E1000_TDFPC); + if (hw->mac.type > e1000_82580) { + regs_buff[551] = adapter->stats.o2bgptc; + regs_buff[552] = adapter->stats.b2ospc; + regs_buff[553] = adapter->stats.o2bspc; + regs_buff[554] = adapter->stats.b2ogprc; + } +} + +static int igb_get_eeprom_len(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + return adapter->hw.nvm.word_size * 2; +} + +static int igb_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u16 *eeprom_buff; + int first_word, last_word; + int ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + + eeprom_buff = kmalloc(sizeof(u16) * + (last_word - first_word + 1), GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + if (hw->nvm.type == e1000_nvm_eeprom_spi) + ret_val = e1000_read_nvm(hw, first_word, + last_word - first_word + 1, + eeprom_buff); + else { + for (i = 0; i < last_word - first_word + 1; i++) { + ret_val = e1000_read_nvm(hw, first_word + i, 1, + &eeprom_buff[i]); + if (ret_val) + break; + } + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + eeprom_buff[i] = le16_to_cpu(eeprom_buff[i]); + + memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), + eeprom->len); + kfree(eeprom_buff); + + return ret_val; +} + +static int igb_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u16 *eeprom_buff; + void *ptr; + int max_len, first_word, last_word, ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EOPNOTSUPP; + + if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) + return -EFAULT; + + max_len = hw->nvm.word_size * 2; + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_buff = kmalloc(max_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ptr = (void *)eeprom_buff; + + if (eeprom->offset & 1) { + /* need read/modify/write of first changed EEPROM word */ + /* only the second byte of the word is being modified */ + ret_val = e1000_read_nvm(hw, first_word, 1, + &eeprom_buff[0]); + ptr++; + } + if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { + /* need read/modify/write of last changed EEPROM word */ + /* only the first byte of the word is being modified */ + ret_val = e1000_read_nvm(hw, last_word, 1, + &eeprom_buff[last_word - first_word]); + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(ptr, bytes, eeprom->len); + + for (i = 0; i < last_word - first_word + 1; i++) + cpu_to_le16s(&eeprom_buff[i]); + + ret_val = e1000_write_nvm(hw, first_word, + last_word - first_word + 1, eeprom_buff); + + /* Update the checksum if write succeeded. + * and flush shadow RAM for 82573 controllers */ + if (ret_val == 0) + e1000_update_nvm_checksum(hw); + + kfree(eeprom_buff); + return ret_val; +} + +static void igb_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + strncpy(drvinfo->driver, igb_driver_name, sizeof(drvinfo->driver) - 1); + strncpy(drvinfo->version, igb_driver_version, sizeof(drvinfo->version) - 1); + + strncpy(drvinfo->fw_version, adapter->fw_version, + sizeof(drvinfo->fw_version) - 1); + strncpy(drvinfo->bus_info, pci_name(adapter->pdev), sizeof(drvinfo->bus_info) -1); + drvinfo->n_stats = IGB_STATS_LEN; + drvinfo->testinfo_len = IGB_TEST_LEN; + drvinfo->regdump_len = igb_get_regs_len(netdev); + drvinfo->eedump_len = igb_get_eeprom_len(netdev); +} + +static void igb_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = IGB_MAX_RXD; + ring->tx_max_pending = IGB_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = adapter->rx_ring_count; + ring->tx_pending = adapter->tx_ring_count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int igb_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct igb_ring *temp_ring; + int i, err = 0; + u16 new_rx_count, new_tx_count; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + new_rx_count = min(ring->rx_pending, (u32)IGB_MAX_RXD); + new_rx_count = max(new_rx_count, (u16)IGB_MIN_RXD); + new_rx_count = ALIGN(new_rx_count, REQ_RX_DESCRIPTOR_MULTIPLE); + + new_tx_count = min(ring->tx_pending, (u32)IGB_MAX_TXD); + new_tx_count = max(new_tx_count, (u16)IGB_MIN_TXD); + new_tx_count = ALIGN(new_tx_count, REQ_TX_DESCRIPTOR_MULTIPLE); + + if ((new_tx_count == adapter->tx_ring_count) && + (new_rx_count == adapter->rx_ring_count)) { + /* nothing to do */ + return 0; + } + + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (!netif_running(adapter->netdev)) { + for (i = 0; i < adapter->num_tx_queues; i++) + adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->count = new_rx_count; + adapter->tx_ring_count = new_tx_count; + adapter->rx_ring_count = new_rx_count; + goto clear_reset; + } + + if (adapter->num_tx_queues > adapter->num_rx_queues) + temp_ring = vmalloc(adapter->num_tx_queues * sizeof(struct igb_ring)); + else + temp_ring = vmalloc(adapter->num_rx_queues * sizeof(struct igb_ring)); + + if (!temp_ring) { + err = -ENOMEM; + goto clear_reset; + } + + igb_down(adapter); + + /* + * We can't just free everything and then setup again, + * because the ISRs in MSI-X mode get passed pointers + * to the tx and rx ring structs. + */ + if (new_tx_count != adapter->tx_ring_count) { + for (i = 0; i < adapter->num_tx_queues; i++) { + memcpy(&temp_ring[i], adapter->tx_ring[i], + sizeof(struct igb_ring)); + + temp_ring[i].count = new_tx_count; + err = igb_setup_tx_resources(&temp_ring[i]); + if (err) { + while (i) { + i--; + igb_free_tx_resources(&temp_ring[i]); + } + goto err_setup; + } + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + igb_free_tx_resources(adapter->tx_ring[i]); + + memcpy(adapter->tx_ring[i], &temp_ring[i], + sizeof(struct igb_ring)); + } + + adapter->tx_ring_count = new_tx_count; + } + + if (new_rx_count != adapter->rx_ring_count) { + for (i = 0; i < adapter->num_rx_queues; i++) { + memcpy(&temp_ring[i], adapter->rx_ring[i], + sizeof(struct igb_ring)); + + temp_ring[i].count = new_rx_count; + err = igb_setup_rx_resources(&temp_ring[i]); + if (err) { + while (i) { + i--; + igb_free_rx_resources(&temp_ring[i]); + } + goto err_setup; + } + + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + igb_free_rx_resources(adapter->rx_ring[i]); + + memcpy(adapter->rx_ring[i], &temp_ring[i], + sizeof(struct igb_ring)); + } + + adapter->rx_ring_count = new_rx_count; + } +err_setup: + igb_up(adapter); + vfree(temp_ring); +clear_reset: + clear_bit(__IGB_RESETTING, &adapter->state); + return err; +} +static bool reg_pattern_test(struct igb_adapter *adapter, u64 *data, + int reg, u32 mask, u32 write) +{ + struct e1000_hw *hw = &adapter->hw; + u32 pat, val; + static const u32 _test[] = + {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; + for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { + E1000_WRITE_REG(hw, reg, (_test[pat] & write)); + val = E1000_READ_REG(hw, reg) & mask; + if (val != (_test[pat] & write & mask)) { + dev_err(pci_dev_to_dev(adapter->pdev), "pattern test reg %04X " + "failed: got 0x%08X expected 0x%08X\n", + E1000_REGISTER(hw, reg), val, (_test[pat] & write & mask)); + *data = E1000_REGISTER(hw, reg); + return 1; + } + } + + return 0; +} + +static bool reg_set_and_check(struct igb_adapter *adapter, u64 *data, + int reg, u32 mask, u32 write) +{ + struct e1000_hw *hw = &adapter->hw; + u32 val; + E1000_WRITE_REG(hw, reg, write & mask); + val = E1000_READ_REG(hw, reg); + if ((write & mask) != (val & mask)) { + dev_err(pci_dev_to_dev(adapter->pdev), "set/check reg %04X test failed:" + " got 0x%08X expected 0x%08X\n", reg, + (val & mask), (write & mask)); + *data = E1000_REGISTER(hw, reg); + return 1; + } + + return 0; +} + +#define REG_PATTERN_TEST(reg, mask, write) \ + do { \ + if (reg_pattern_test(adapter, data, reg, mask, write)) \ + return 1; \ + } while (0) + +#define REG_SET_AND_CHECK(reg, mask, write) \ + do { \ + if (reg_set_and_check(adapter, data, reg, mask, write)) \ + return 1; \ + } while (0) + +static int igb_reg_test(struct igb_adapter *adapter, u64 *data) +{ + struct e1000_hw *hw = &adapter->hw; + struct igb_reg_test *test; + u32 value, before, after; + u32 i, toggle; + + switch (adapter->hw.mac.type) { + case e1000_i350: + case e1000_i354: + test = reg_test_i350; + toggle = 0x7FEFF3FF; + break; + case e1000_i210: + case e1000_i211: + test = reg_test_i210; + toggle = 0x7FEFF3FF; + break; + case e1000_82580: + test = reg_test_82580; + toggle = 0x7FEFF3FF; + break; + case e1000_82576: + test = reg_test_82576; + toggle = 0x7FFFF3FF; + break; + default: + test = reg_test_82575; + toggle = 0x7FFFF3FF; + break; + } + + /* Because the status register is such a special case, + * we handle it separately from the rest of the register + * tests. Some bits are read-only, some toggle, and some + * are writable on newer MACs. + */ + before = E1000_READ_REG(hw, E1000_STATUS); + value = (E1000_READ_REG(hw, E1000_STATUS) & toggle); + E1000_WRITE_REG(hw, E1000_STATUS, toggle); + after = E1000_READ_REG(hw, E1000_STATUS) & toggle; + if (value != after) { + dev_err(pci_dev_to_dev(adapter->pdev), "failed STATUS register test " + "got: 0x%08X expected: 0x%08X\n", after, value); + *data = 1; + return 1; + } + /* restore previous status */ + E1000_WRITE_REG(hw, E1000_STATUS, before); + + /* Perform the remainder of the register test, looping through + * the test table until we either fail or reach the null entry. + */ + while (test->reg) { + for (i = 0; i < test->array_len; i++) { + switch (test->test_type) { + case PATTERN_TEST: + REG_PATTERN_TEST(test->reg + + (i * test->reg_offset), + test->mask, + test->write); + break; + case SET_READ_TEST: + REG_SET_AND_CHECK(test->reg + + (i * test->reg_offset), + test->mask, + test->write); + break; + case WRITE_NO_TEST: + writel(test->write, + (adapter->hw.hw_addr + test->reg) + + (i * test->reg_offset)); + break; + case TABLE32_TEST: + REG_PATTERN_TEST(test->reg + (i * 4), + test->mask, + test->write); + break; + case TABLE64_TEST_LO: + REG_PATTERN_TEST(test->reg + (i * 8), + test->mask, + test->write); + break; + case TABLE64_TEST_HI: + REG_PATTERN_TEST((test->reg + 4) + (i * 8), + test->mask, + test->write); + break; + } + } + test++; + } + + *data = 0; + return 0; +} + +static int igb_eeprom_test(struct igb_adapter *adapter, u64 *data) +{ + *data = 0; + + /* Validate NVM checksum */ + if (e1000_validate_nvm_checksum(&adapter->hw) < 0) + *data = 2; + + return *data; +} + +static irqreturn_t igb_test_intr(int irq, void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *) data; + struct e1000_hw *hw = &adapter->hw; + + adapter->test_icr |= E1000_READ_REG(hw, E1000_ICR); + + return IRQ_HANDLED; +} + +static int igb_intr_test(struct igb_adapter *adapter, u64 *data) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + u32 mask, ics_mask, i = 0, shared_int = TRUE; + u32 irq = adapter->pdev->irq; + + *data = 0; + + /* Hook up test interrupt handler just for this test */ + if (adapter->msix_entries) { + if (request_irq(adapter->msix_entries[0].vector, + &igb_test_intr, 0, netdev->name, adapter)) { + *data = 1; + return -1; + } + } else if (adapter->flags & IGB_FLAG_HAS_MSI) { + shared_int = FALSE; + if (request_irq(irq, + igb_test_intr, 0, netdev->name, adapter)) { + *data = 1; + return -1; + } + } else if (!request_irq(irq, igb_test_intr, IRQF_PROBE_SHARED, + netdev->name, adapter)) { + shared_int = FALSE; + } else if (request_irq(irq, &igb_test_intr, IRQF_SHARED, + netdev->name, adapter)) { + *data = 1; + return -1; + } + dev_info(pci_dev_to_dev(adapter->pdev), "testing %s interrupt\n", + (shared_int ? "shared" : "unshared")); + + /* Disable all the interrupts */ + E1000_WRITE_REG(hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + /* Define all writable bits for ICS */ + switch (hw->mac.type) { + case e1000_82575: + ics_mask = 0x37F47EDD; + break; + case e1000_82576: + ics_mask = 0x77D4FBFD; + break; + case e1000_82580: + ics_mask = 0x77DCFED5; + break; + case e1000_i350: + case e1000_i354: + ics_mask = 0x77DCFED5; + break; + case e1000_i210: + case e1000_i211: + ics_mask = 0x774CFED5; + break; + default: + ics_mask = 0x7FFFFFFF; + break; + } + + /* Test each interrupt */ + for (; i < 31; i++) { + /* Interrupt to test */ + mask = 1 << i; + + if (!(mask & ics_mask)) + continue; + + if (!shared_int) { + /* Disable the interrupt to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + + /* Flush any pending interrupts */ + E1000_WRITE_REG(hw, E1000_ICR, ~0); + + E1000_WRITE_REG(hw, E1000_IMC, mask); + E1000_WRITE_REG(hw, E1000_ICS, mask); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + if (adapter->test_icr & mask) { + *data = 3; + break; + } + } + + /* Enable the interrupt to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was not posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + + /* Flush any pending interrupts */ + E1000_WRITE_REG(hw, E1000_ICR, ~0); + + E1000_WRITE_REG(hw, E1000_IMS, mask); + E1000_WRITE_REG(hw, E1000_ICS, mask); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + if (!(adapter->test_icr & mask)) { + *data = 4; + break; + } + + if (!shared_int) { + /* Disable the other interrupts to be reported in + * the cause register and then force the other + * interrupts and see if any get posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + + /* Flush any pending interrupts */ + E1000_WRITE_REG(hw, E1000_ICR, ~0); + + E1000_WRITE_REG(hw, E1000_IMC, ~mask); + E1000_WRITE_REG(hw, E1000_ICS, ~mask); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + if (adapter->test_icr & mask) { + *data = 5; + break; + } + } + } + + /* Disable all the interrupts */ + E1000_WRITE_REG(hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + /* Unhook test interrupt handler */ + if (adapter->msix_entries) + free_irq(adapter->msix_entries[0].vector, adapter); + else + free_irq(irq, adapter); + + return *data; +} + +static void igb_free_desc_rings(struct igb_adapter *adapter) +{ + igb_free_tx_resources(&adapter->test_tx_ring); + igb_free_rx_resources(&adapter->test_rx_ring); +} + +static int igb_setup_desc_rings(struct igb_adapter *adapter) +{ + struct igb_ring *tx_ring = &adapter->test_tx_ring; + struct igb_ring *rx_ring = &adapter->test_rx_ring; + struct e1000_hw *hw = &adapter->hw; + int ret_val; + + /* Setup Tx descriptor ring and Tx buffers */ + tx_ring->count = IGB_DEFAULT_TXD; + tx_ring->dev = pci_dev_to_dev(adapter->pdev); + tx_ring->netdev = adapter->netdev; + tx_ring->reg_idx = adapter->vfs_allocated_count; + + if (igb_setup_tx_resources(tx_ring)) { + ret_val = 1; + goto err_nomem; + } + + igb_setup_tctl(adapter); + igb_configure_tx_ring(adapter, tx_ring); + + /* Setup Rx descriptor ring and Rx buffers */ + rx_ring->count = IGB_DEFAULT_RXD; + rx_ring->dev = pci_dev_to_dev(adapter->pdev); + rx_ring->netdev = adapter->netdev; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + rx_ring->rx_buffer_len = IGB_RX_HDR_LEN; +#endif + rx_ring->reg_idx = adapter->vfs_allocated_count; + + if (igb_setup_rx_resources(rx_ring)) { + ret_val = 2; + goto err_nomem; + } + + /* set the default queue to queue 0 of PF */ + E1000_WRITE_REG(hw, E1000_MRQC, adapter->vfs_allocated_count << 3); + + /* enable receive ring */ + igb_setup_rctl(adapter); + igb_configure_rx_ring(adapter, rx_ring); + + igb_alloc_rx_buffers(rx_ring, igb_desc_unused(rx_ring)); + + return 0; + +err_nomem: + igb_free_desc_rings(adapter); + return ret_val; +} + +static void igb_phy_disable_receiver(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + /* Write out to PHY registers 29 and 30 to disable the Receiver. */ + e1000_write_phy_reg(hw, 29, 0x001F); + e1000_write_phy_reg(hw, 30, 0x8FFC); + e1000_write_phy_reg(hw, 29, 0x001A); + e1000_write_phy_reg(hw, 30, 0x8FF0); +} + +static int igb_integrated_phy_loopback(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_reg = 0; + + hw->mac.autoneg = FALSE; + + if (hw->phy.type == e1000_phy_m88) { + if (hw->phy.id != I210_I_PHY_ID) { + /* Auto-MDI/MDIX Off */ + e1000_write_phy_reg(hw, M88E1000_PHY_SPEC_CTRL, 0x0808); + /* reset to update Auto-MDI/MDIX */ + e1000_write_phy_reg(hw, PHY_CONTROL, 0x9140); + /* autoneg off */ + e1000_write_phy_reg(hw, PHY_CONTROL, 0x8140); + } else { + /* force 1000, set loopback */ + e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140); + } + } else { + /* enable MII loopback */ + if (hw->phy.type == e1000_phy_82580) + e1000_write_phy_reg(hw, I82577_PHY_LBK_CTRL, 0x8041); + } + + /* force 1000, set loopback */ + e1000_write_phy_reg(hw, PHY_CONTROL, 0x4140); + + /* Now set up the MAC to the same speed/duplex as the PHY. */ + ctrl_reg = E1000_READ_REG(hw, E1000_CTRL); + ctrl_reg &= ~E1000_CTRL_SPD_SEL; /* Clear the speed sel bits */ + ctrl_reg |= (E1000_CTRL_FRCSPD | /* Set the Force Speed Bit */ + E1000_CTRL_FRCDPX | /* Set the Force Duplex Bit */ + E1000_CTRL_SPD_1000 |/* Force Speed to 1000 */ + E1000_CTRL_FD | /* Force Duplex to FULL */ + E1000_CTRL_SLU); /* Set link up enable bit */ + + if (hw->phy.type == e1000_phy_m88) + ctrl_reg |= E1000_CTRL_ILOS; /* Invert Loss of Signal */ + + E1000_WRITE_REG(hw, E1000_CTRL, ctrl_reg); + + /* Disable the receiver on the PHY so when a cable is plugged in, the + * PHY does not begin to autoneg when a cable is reconnected to the NIC. + */ + if (hw->phy.type == e1000_phy_m88) + igb_phy_disable_receiver(adapter); + + mdelay(500); + return 0; +} + +static int igb_set_phy_loopback(struct igb_adapter *adapter) +{ + return igb_integrated_phy_loopback(adapter); +} + +static int igb_setup_loopback_test(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + + /* use CTRL_EXT to identify link type as SGMII can appear as copper */ + if (reg & E1000_CTRL_EXT_LINK_MODE_MASK) { + if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { + + /* Enable DH89xxCC MPHY for near end loopback */ + reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL); + reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK) | + E1000_MPHY_PCS_CLK_REG_OFFSET; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg); + + reg = E1000_READ_REG(hw, E1000_MPHY_DATA); + reg |= E1000_MPHY_PCS_CLK_REG_DIGINELBEN; + E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg); + } + + reg = E1000_READ_REG(hw, E1000_RCTL); + reg |= E1000_RCTL_LBM_TCVR; + E1000_WRITE_REG(hw, E1000_RCTL, reg); + + E1000_WRITE_REG(hw, E1000_SCTL, E1000_ENABLE_SERDES_LOOPBACK); + + reg = E1000_READ_REG(hw, E1000_CTRL); + reg &= ~(E1000_CTRL_RFCE | + E1000_CTRL_TFCE | + E1000_CTRL_LRST); + reg |= E1000_CTRL_SLU | + E1000_CTRL_FD; + E1000_WRITE_REG(hw, E1000_CTRL, reg); + + /* Unset switch control to serdes energy detect */ + reg = E1000_READ_REG(hw, E1000_CONNSW); + reg &= ~E1000_CONNSW_ENRGSRC; + E1000_WRITE_REG(hw, E1000_CONNSW, reg); + + /* Unset sigdetect for SERDES loopback on + * 82580 and newer devices + */ + if (hw->mac.type >= e1000_82580) { + reg = E1000_READ_REG(hw, E1000_PCS_CFG0); + reg |= E1000_PCS_CFG_IGN_SD; + E1000_WRITE_REG(hw, E1000_PCS_CFG0, reg); + } + + /* Set PCS register for forced speed */ + reg = E1000_READ_REG(hw, E1000_PCS_LCTL); + reg &= ~E1000_PCS_LCTL_AN_ENABLE; /* Disable Autoneg*/ + reg |= E1000_PCS_LCTL_FLV_LINK_UP | /* Force link up */ + E1000_PCS_LCTL_FSV_1000 | /* Force 1000 */ + E1000_PCS_LCTL_FDV_FULL | /* SerDes Full duplex */ + E1000_PCS_LCTL_FSD | /* Force Speed */ + E1000_PCS_LCTL_FORCE_LINK; /* Force Link */ + E1000_WRITE_REG(hw, E1000_PCS_LCTL, reg); + + return 0; + } + + return igb_set_phy_loopback(adapter); +} + +static void igb_loopback_cleanup(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rctl; + u16 phy_reg; + + if ((hw->device_id == E1000_DEV_ID_DH89XXCC_SGMII) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SERDES) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_BACKPLANE) || + (hw->device_id == E1000_DEV_ID_DH89XXCC_SFP)) { + u32 reg; + + /* Disable near end loopback on DH89xxCC */ + reg = E1000_READ_REG(hw, E1000_MPHY_ADDR_CTL); + reg = (reg & E1000_MPHY_ADDR_CTL_OFFSET_MASK ) | + E1000_MPHY_PCS_CLK_REG_OFFSET; + E1000_WRITE_REG(hw, E1000_MPHY_ADDR_CTL, reg); + + reg = E1000_READ_REG(hw, E1000_MPHY_DATA); + reg &= ~E1000_MPHY_PCS_CLK_REG_DIGINELBEN; + E1000_WRITE_REG(hw, E1000_MPHY_DATA, reg); + } + + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + hw->mac.autoneg = TRUE; + e1000_read_phy_reg(hw, PHY_CONTROL, &phy_reg); + if (phy_reg & MII_CR_LOOPBACK) { + phy_reg &= ~MII_CR_LOOPBACK; + if (hw->phy.type == I210_I_PHY_ID) + e1000_write_phy_reg(hw, I347AT4_PAGE_SELECT, 0); + e1000_write_phy_reg(hw, PHY_CONTROL, phy_reg); + e1000_phy_commit(hw); + } +} +static void igb_create_lbtest_frame(struct sk_buff *skb, + unsigned int frame_size) +{ + memset(skb->data, 0xFF, frame_size); + frame_size /= 2; + memset(&skb->data[frame_size], 0xAA, frame_size - 1); + memset(&skb->data[frame_size + 10], 0xBE, 1); + memset(&skb->data[frame_size + 12], 0xAF, 1); +} + +static int igb_check_lbtest_frame(struct igb_rx_buffer *rx_buffer, + unsigned int frame_size) +{ + unsigned char *data; + bool match = true; + + frame_size >>= 1; + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + data = rx_buffer->skb->data; +#else + data = kmap(rx_buffer->page); +#endif + + if (data[3] != 0xFF || + data[frame_size + 10] != 0xBE || + data[frame_size + 12] != 0xAF) + match = false; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + kunmap(rx_buffer->page); + +#endif + return match; +} + +static u16 igb_clean_test_rings(struct igb_ring *rx_ring, + struct igb_ring *tx_ring, + unsigned int size) +{ + union e1000_adv_rx_desc *rx_desc; + struct igb_rx_buffer *rx_buffer_info; + struct igb_tx_buffer *tx_buffer_info; + u16 rx_ntc, tx_ntc, count = 0; + + /* initialize next to clean and descriptor values */ + rx_ntc = rx_ring->next_to_clean; + tx_ntc = tx_ring->next_to_clean; + rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); + + while (igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) { + /* check rx buffer */ + rx_buffer_info = &rx_ring->rx_buffer_info[rx_ntc]; + + /* sync Rx buffer for CPU read */ + dma_sync_single_for_cpu(rx_ring->dev, + rx_buffer_info->dma, +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + IGB_RX_HDR_LEN, +#else + IGB_RX_BUFSZ, +#endif + DMA_FROM_DEVICE); + + /* verify contents of skb */ + if (igb_check_lbtest_frame(rx_buffer_info, size)) + count++; + + /* sync Rx buffer for device write */ + dma_sync_single_for_device(rx_ring->dev, + rx_buffer_info->dma, +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + IGB_RX_HDR_LEN, +#else + IGB_RX_BUFSZ, +#endif + DMA_FROM_DEVICE); + + /* unmap buffer on tx side */ + tx_buffer_info = &tx_ring->tx_buffer_info[tx_ntc]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer_info); + + /* increment rx/tx next to clean counters */ + rx_ntc++; + if (rx_ntc == rx_ring->count) + rx_ntc = 0; + tx_ntc++; + if (tx_ntc == tx_ring->count) + tx_ntc = 0; + + /* fetch next descriptor */ + rx_desc = IGB_RX_DESC(rx_ring, rx_ntc); + } + + /* re-map buffers to ring, store next to clean values */ + igb_alloc_rx_buffers(rx_ring, count); + rx_ring->next_to_clean = rx_ntc; + tx_ring->next_to_clean = tx_ntc; + + return count; +} + +static int igb_run_loopback_test(struct igb_adapter *adapter) +{ + struct igb_ring *tx_ring = &adapter->test_tx_ring; + struct igb_ring *rx_ring = &adapter->test_rx_ring; + u16 i, j, lc, good_cnt; + int ret_val = 0; + unsigned int size = IGB_RX_HDR_LEN; + netdev_tx_t tx_ret_val; + struct sk_buff *skb; + + /* allocate test skb */ + skb = alloc_skb(size, GFP_KERNEL); + if (!skb) + return 11; + + /* place data into test skb */ + igb_create_lbtest_frame(skb, size); + skb_put(skb, size); + + /* + * Calculate the loop count based on the largest descriptor ring + * The idea is to wrap the largest ring a number of times using 64 + * send/receive pairs during each loop + */ + + if (rx_ring->count <= tx_ring->count) + lc = ((tx_ring->count / 64) * 2) + 1; + else + lc = ((rx_ring->count / 64) * 2) + 1; + + for (j = 0; j <= lc; j++) { /* loop count loop */ + /* reset count of good packets */ + good_cnt = 0; + + /* place 64 packets on the transmit queue*/ + for (i = 0; i < 64; i++) { + skb_get(skb); + tx_ret_val = igb_xmit_frame_ring(skb, tx_ring); + if (tx_ret_val == NETDEV_TX_OK) + good_cnt++; + } + + if (good_cnt != 64) { + ret_val = 12; + break; + } + + /* allow 200 milliseconds for packets to go from tx to rx */ + msleep(200); + + good_cnt = igb_clean_test_rings(rx_ring, tx_ring, size); + if (good_cnt != 64) { + ret_val = 13; + break; + } + } /* end loop count loop */ + + /* free the original skb */ + kfree_skb(skb); + + return ret_val; +} + +static int igb_loopback_test(struct igb_adapter *adapter, u64 *data) +{ + /* PHY loopback cannot be performed if SoL/IDER + * sessions are active */ + if (e1000_check_reset_block(&adapter->hw)) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Cannot do PHY loopback test " + "when SoL/IDER is active.\n"); + *data = 0; + goto out; + } + if (adapter->hw.mac.type == e1000_i354) { + dev_info(&adapter->pdev->dev, + "Loopback test not supported on i354.\n"); + *data = 0; + goto out; + } + *data = igb_setup_desc_rings(adapter); + if (*data) + goto out; + *data = igb_setup_loopback_test(adapter); + if (*data) + goto err_loopback; + *data = igb_run_loopback_test(adapter); + + igb_loopback_cleanup(adapter); + +err_loopback: + igb_free_desc_rings(adapter); +out: + return *data; +} + +static int igb_link_test(struct igb_adapter *adapter, u64 *data) +{ + u32 link; + int i, time; + + *data = 0; + time = 0; + if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + int i = 0; + adapter->hw.mac.serdes_has_link = FALSE; + + /* On some blade server designs, link establishment + * could take as long as 2-3 minutes */ + do { + e1000_check_for_link(&adapter->hw); + if (adapter->hw.mac.serdes_has_link) + goto out; + msleep(20); + } while (i++ < 3750); + + *data = 1; + } else { + for (i=0; i < IGB_MAX_LINK_TRIES; i++) { + link = igb_has_link(adapter); + if (link) + goto out; + else { + time++; + msleep(1000); + } + } + if (!link) + *data = 1; + } + out: + return *data; +} + +static void igb_diag_test(struct net_device *netdev, + struct ethtool_test *eth_test, u64 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + u16 autoneg_advertised; + u8 forced_speed_duplex, autoneg; + bool if_running = netif_running(netdev); + + set_bit(__IGB_TESTING, &adapter->state); + if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + /* Offline tests */ + + /* save speed, duplex, autoneg settings */ + autoneg_advertised = adapter->hw.phy.autoneg_advertised; + forced_speed_duplex = adapter->hw.mac.forced_speed_duplex; + autoneg = adapter->hw.mac.autoneg; + + dev_info(pci_dev_to_dev(adapter->pdev), "offline testing starting\n"); + + /* power up link for link test */ + igb_power_up_link(adapter); + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result */ + if (igb_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if (if_running) + /* indicate we're in test mode */ + dev_close(netdev); + else + igb_reset(adapter); + + if (igb_reg_test(adapter, &data[0])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igb_reset(adapter); + if (igb_eeprom_test(adapter, &data[1])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igb_reset(adapter); + if (igb_intr_test(adapter, &data[2])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + igb_reset(adapter); + + /* power up link for loopback test */ + igb_power_up_link(adapter); + + if (igb_loopback_test(adapter, &data[3])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* restore speed, duplex, autoneg settings */ + adapter->hw.phy.autoneg_advertised = autoneg_advertised; + adapter->hw.mac.forced_speed_duplex = forced_speed_duplex; + adapter->hw.mac.autoneg = autoneg; + + /* force this routine to wait until autoneg complete/timeout */ + adapter->hw.phy.autoneg_wait_to_complete = TRUE; + igb_reset(adapter); + adapter->hw.phy.autoneg_wait_to_complete = FALSE; + + clear_bit(__IGB_TESTING, &adapter->state); + if (if_running) + dev_open(netdev); + } else { + dev_info(pci_dev_to_dev(adapter->pdev), "online testing starting\n"); + + /* PHY is powered down when interface is down */ + if (if_running && igb_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + else + data[4] = 0; + + /* Online tests aren't run; pass by default */ + data[0] = 0; + data[1] = 0; + data[2] = 0; + data[3] = 0; + + clear_bit(__IGB_TESTING, &adapter->state); + } + msleep_interruptible(4 * 1000); +} + +static void igb_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC | + WAKE_PHY; + wol->wolopts = 0; + + if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) + return; + + /* apply any specific unsupported masks here */ + switch (adapter->hw.device_id) { + default: + break; + } + + if (adapter->wol & E1000_WUFC_EX) + wol->wolopts |= WAKE_UCAST; + if (adapter->wol & E1000_WUFC_MC) + wol->wolopts |= WAKE_MCAST; + if (adapter->wol & E1000_WUFC_BC) + wol->wolopts |= WAKE_BCAST; + if (adapter->wol & E1000_WUFC_MAG) + wol->wolopts |= WAKE_MAGIC; + if (adapter->wol & E1000_WUFC_LNKC) + wol->wolopts |= WAKE_PHY; +} + +static int igb_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (wol->wolopts & (WAKE_ARP | WAKE_MAGICSECURE)) + return -EOPNOTSUPP; + + if (!(adapter->flags & IGB_FLAG_WOL_SUPPORTED)) + return wol->wolopts ? -EOPNOTSUPP : 0; + + /* these settings will always override what we currently have */ + adapter->wol = 0; + + if (wol->wolopts & WAKE_UCAST) + adapter->wol |= E1000_WUFC_EX; + if (wol->wolopts & WAKE_MCAST) + adapter->wol |= E1000_WUFC_MC; + if (wol->wolopts & WAKE_BCAST) + adapter->wol |= E1000_WUFC_BC; + if (wol->wolopts & WAKE_MAGIC) + adapter->wol |= E1000_WUFC_MAG; + if (wol->wolopts & WAKE_PHY) + adapter->wol |= E1000_WUFC_LNKC; + device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + return 0; +} + +/* bit defines for adapter->led_status */ +#ifdef HAVE_ETHTOOL_SET_PHYS_ID +static int igb_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + e1000_blink_led(hw); + return 2; + case ETHTOOL_ID_ON: + e1000_led_on(hw); + break; + case ETHTOOL_ID_OFF: + e1000_led_off(hw); + break; + case ETHTOOL_ID_INACTIVE: + e1000_led_off(hw); + e1000_cleanup_led(hw); + break; + } + + return 0; +} +#else +static int igb_phys_id(struct net_device *netdev, u32 data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + unsigned long timeout; + + timeout = data * 1000; + + /* + * msleep_interruptable only accepts unsigned int so we are limited + * in how long a duration we can wait + */ + if (!timeout || timeout > UINT_MAX) + timeout = UINT_MAX; + + e1000_blink_led(hw); + msleep_interruptible(timeout); + + e1000_led_off(hw); + e1000_cleanup_led(hw); + + return 0; +} +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ + +static int igb_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int i; + + if ((ec->rx_coalesce_usecs > IGB_MAX_ITR_USECS) || + ((ec->rx_coalesce_usecs > 3) && + (ec->rx_coalesce_usecs < IGB_MIN_ITR_USECS)) || + (ec->rx_coalesce_usecs == 2)) + { + printk("set_coalesce:invalid parameter.."); + return -EINVAL; + } + + if ((ec->tx_coalesce_usecs > IGB_MAX_ITR_USECS) || + ((ec->tx_coalesce_usecs > 3) && + (ec->tx_coalesce_usecs < IGB_MIN_ITR_USECS)) || + (ec->tx_coalesce_usecs == 2)) + return -EINVAL; + + if ((adapter->flags & IGB_FLAG_QUEUE_PAIRS) && ec->tx_coalesce_usecs) + return -EINVAL; + + if (ec->tx_max_coalesced_frames_irq) + adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; + + /* If ITR is disabled, disable DMAC */ + if (ec->rx_coalesce_usecs == 0) { + adapter->dmac = IGB_DMAC_DISABLE; + } + + /* convert to rate of irq's per second */ + if (ec->rx_coalesce_usecs && ec->rx_coalesce_usecs <= 3) + adapter->rx_itr_setting = ec->rx_coalesce_usecs; + else + adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; + + /* convert to rate of irq's per second */ + if (adapter->flags & IGB_FLAG_QUEUE_PAIRS) + adapter->tx_itr_setting = adapter->rx_itr_setting; + else if (ec->tx_coalesce_usecs && ec->tx_coalesce_usecs <= 3) + adapter->tx_itr_setting = ec->tx_coalesce_usecs; + else + adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + q_vector->tx.work_limit = adapter->tx_work_limit; + if (q_vector->rx.ring) + q_vector->itr_val = adapter->rx_itr_setting; + else + q_vector->itr_val = adapter->tx_itr_setting; + if (q_vector->itr_val && q_vector->itr_val <= 3) + q_vector->itr_val = IGB_START_ITR; + q_vector->set_itr = 1; + } + + return 0; +} + +static int igb_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (adapter->rx_itr_setting <= 3) + ec->rx_coalesce_usecs = adapter->rx_itr_setting; + else + ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; + + ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; + + if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) { + if (adapter->tx_itr_setting <= 3) + ec->tx_coalesce_usecs = adapter->tx_itr_setting; + else + ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; + } + + return 0; +} + +static int igb_nway_reset(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + if (netif_running(netdev)) + igb_reinit_locked(adapter); + return 0; +} + +#ifdef HAVE_ETHTOOL_GET_SSET_COUNT +static int igb_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return IGB_STATS_LEN; + case ETH_SS_TEST: + return IGB_TEST_LEN; + default: + return -ENOTSUPP; + } +} +#else +static int igb_get_stats_count(struct net_device *netdev) +{ + return IGB_STATS_LEN; +} + +static int igb_diag_test_count(struct net_device *netdev) +{ + return IGB_TEST_LEN; +} +#endif + +static void igb_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif + u64 *queue_stat; + int i, j, k; + char *p; + + igb_update_stats(adapter); + + for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { + p = (char *)adapter + igb_gstrings_stats[i].stat_offset; + data[i] = (igb_gstrings_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < IGB_NETDEV_STATS_LEN; j++, i++) { + p = (char *)net_stats + igb_gstrings_net_stats[j].stat_offset; + data[i] = (igb_gstrings_net_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < adapter->num_tx_queues; j++) { + queue_stat = (u64 *)&adapter->tx_ring[j]->tx_stats; + for (k = 0; k < IGB_TX_QUEUE_STATS_LEN; k++, i++) + data[i] = queue_stat[k]; + } + for (j = 0; j < adapter->num_rx_queues; j++) { + queue_stat = (u64 *)&adapter->rx_ring[j]->rx_stats; + for (k = 0; k < IGB_RX_QUEUE_STATS_LEN; k++, i++) + data[i] = queue_stat[k]; + } +} + +static void igb_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + u8 *p = data; + int i; + + switch (stringset) { + case ETH_SS_TEST: + memcpy(data, *igb_gstrings_test, + IGB_TEST_LEN*ETH_GSTRING_LEN); + break; + case ETH_SS_STATS: + for (i = 0; i < IGB_GLOBAL_STATS_LEN; i++) { + memcpy(p, igb_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < IGB_NETDEV_STATS_LEN; i++) { + memcpy(p, igb_gstrings_net_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_restart", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_drops", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_csum_err", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_alloc_failed", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv4_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv4e_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv6_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_ipv6e_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_tcp_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_udp_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_sctp_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_nfs_packets", i); + p += ETH_GSTRING_LEN; + } +/* BUG_ON(p - data != IGB_STATS_LEN * ETH_GSTRING_LEN); */ + break; + } +} + +#ifdef HAVE_ETHTOOL_GET_TS_INFO +static int igb_get_ts_info(struct net_device *dev, + struct ethtool_ts_info *info) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + switch (adapter->hw.mac.type) { +#ifdef HAVE_PTP_1588_CLOCK + case e1000_82575: + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE; + return 0; + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + info->so_timestamping = + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_RX_SOFTWARE | + SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_TX_HARDWARE | + SOF_TIMESTAMPING_RX_HARDWARE | + SOF_TIMESTAMPING_RAW_HARDWARE; + + if (adapter->ptp_clock) + info->phc_index = ptp_clock_index(adapter->ptp_clock); + else + info->phc_index = -1; + + info->tx_types = + (1 << HWTSTAMP_TX_OFF) | + (1 << HWTSTAMP_TX_ON); + + info->rx_filters = 1 << HWTSTAMP_FILTER_NONE; + + /* 82576 does not support timestamping all packets. */ + if (adapter->hw.mac.type >= e1000_82580) + info->rx_filters |= 1 << HWTSTAMP_FILTER_ALL; + else + info->rx_filters |= + (1 << HWTSTAMP_FILTER_PTP_V1_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_SYNC) | + (1 << HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ) | + (1 << HWTSTAMP_FILTER_PTP_V2_EVENT); + + return 0; +#endif /* HAVE_PTP_1588_CLOCK */ + default: + return -EOPNOTSUPP; + } +} +#endif /* HAVE_ETHTOOL_GET_TS_INFO */ + +#ifdef CONFIG_PM_RUNTIME +static int igb_ethtool_begin(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + pm_runtime_get_sync(&adapter->pdev->dev); + + return 0; +} + +static void igb_ethtool_complete(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + pm_runtime_put(&adapter->pdev->dev); +} +#endif /* CONFIG_PM_RUNTIME */ + +#ifndef HAVE_NDO_SET_FEATURES +static u32 igb_get_rx_csum(struct net_device *netdev) +{ + return !!(netdev->features & NETIF_F_RXCSUM); +} + +static int igb_set_rx_csum(struct net_device *netdev, u32 data) +{ + const u32 feature_list = NETIF_F_RXCSUM; + + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + + return 0; +} + +static int igb_set_tx_csum(struct net_device *netdev, u32 data) +{ + struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef NETIF_F_IPV6_CSUM + u32 feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; +#else + u32 feature_list = NETIF_F_IP_CSUM; +#endif + + if (adapter->hw.mac.type >= e1000_82576) + feature_list |= NETIF_F_SCTP_CSUM; + + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + + return 0; +} + +#ifdef NETIF_F_TSO +static int igb_set_tso(struct net_device *netdev, u32 data) +{ +#ifdef NETIF_F_TSO6 + const u32 feature_list = NETIF_F_TSO | NETIF_F_TSO6; +#else + const u32 feature_list = NETIF_F_TSO; +#endif + + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + +#ifndef HAVE_NETDEV_VLAN_FEATURES + if (!data) { + struct igb_adapter *adapter = netdev_priv(netdev); + struct net_device *v_netdev; + int i; + + /* disable TSO on all VLANs if they're present */ + if (!adapter->vlgrp) + goto tso_out; + + for (i = 0; i < VLAN_GROUP_ARRAY_LEN; i++) { + v_netdev = vlan_group_get_device(adapter->vlgrp, i); + if (!v_netdev) + continue; + + v_netdev->features &= ~feature_list; + vlan_group_set_device(adapter->vlgrp, i, v_netdev); + } + } + +tso_out: + +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + return 0; +} + +#endif /* NETIF_F_TSO */ +#ifdef ETHTOOL_GFLAGS +static int igb_set_flags(struct net_device *netdev, u32 data) +{ + u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN | + ETH_FLAG_RXHASH; +#ifndef HAVE_VLAN_RX_REGISTER + u32 changed = netdev->features ^ data; +#endif + int rc; +#ifndef IGB_NO_LRO + + supported_flags |= ETH_FLAG_LRO; +#endif + /* + * Since there is no support for separate tx vlan accel + * enabled make sure tx flag is cleared if rx is. + */ + if (!(data & ETH_FLAG_RXVLAN)) + data &= ~ETH_FLAG_TXVLAN; + + rc = ethtool_op_set_flags(netdev, data, supported_flags); + if (rc) + return rc; +#ifndef HAVE_VLAN_RX_REGISTER + + if (changed & ETH_FLAG_RXVLAN) + igb_vlan_mode(netdev, data); +#endif + + return 0; +} + +#endif /* ETHTOOL_GFLAGS */ +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_SADV_COAL +static int igb_set_adv_coal(struct net_device *netdev, struct ethtool_value *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + switch (edata->data) { + case IGB_DMAC_DISABLE: + adapter->dmac = edata->data; + break; + case IGB_DMAC_MIN: + adapter->dmac = edata->data; + break; + case IGB_DMAC_500: + adapter->dmac = edata->data; + break; + case IGB_DMAC_EN_DEFAULT: + adapter->dmac = edata->data; + break; + case IGB_DMAC_2000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_3000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_4000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_5000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_6000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_7000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_8000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_9000: + adapter->dmac = edata->data; + break; + case IGB_DMAC_MAX: + adapter->dmac = edata->data; + break; + default: + adapter->dmac = IGB_DMAC_DISABLE; + printk("set_dmac: invalid setting, setting DMAC to %d\n", + adapter->dmac); + } + printk("%s: setting DMAC to %d\n", netdev->name, adapter->dmac); + return 0; +} +#endif /* ETHTOOL_SADV_COAL */ +#ifdef ETHTOOL_GADV_COAL +static void igb_get_dmac(struct net_device *netdev, + struct ethtool_value *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + edata->data = adapter->dmac; + + return; +} +#endif + +#ifdef ETHTOOL_GEEE +static int igb_get_eee(struct net_device *netdev, struct ethtool_eee *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 ret_val; + u16 phy_data; + + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) + return -EOPNOTSUPP; + + edata->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_100baseT_Full); + + if (!hw->dev_spec._82575.eee_disable) + edata->advertised = + mmd_eee_adv_to_ethtool_adv_t(adapter->eee_advert); + + /* The IPCNFG and EEER registers are not supported on I354. */ + if (hw->mac.type == e1000_i354) { + e1000_get_eee_status_i354(hw, (bool *)&edata->eee_active); + } else { + u32 eeer; + + eeer = E1000_READ_REG(hw, E1000_EEER); + + /* EEE status on negotiated link */ + if (eeer & E1000_EEER_EEE_NEG) + edata->eee_active = true; + + if (eeer & E1000_EEER_TX_LPI_EN) + edata->tx_lpi_enabled = true; + } + + /* EEE Link Partner Advertised */ + switch (hw->mac.type) { + case e1000_i350: + ret_val = e1000_read_emi_reg(hw, E1000_EEE_LP_ADV_ADDR_I350, + &phy_data); + if (ret_val) + return -ENODATA; + + edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + + break; + case e1000_i354: + case e1000_i210: + case e1000_i211: + ret_val = e1000_read_xmdio_reg(hw, E1000_EEE_LP_ADV_ADDR_I210, + E1000_EEE_LP_ADV_DEV_I210, + &phy_data); + if (ret_val) + return -ENODATA; + + edata->lp_advertised = mmd_eee_adv_to_ethtool_adv_t(phy_data); + + break; + default: + break; + } + + edata->eee_enabled = !hw->dev_spec._82575.eee_disable; + + if ((hw->mac.type == e1000_i354) && + (edata->eee_enabled)) + edata->tx_lpi_enabled = true; + + /* + * report correct negotiated EEE status for devices that + * wrongly report EEE at half-duplex + */ + if (adapter->link_duplex == HALF_DUPLEX) { + edata->eee_enabled = false; + edata->eee_active = false; + edata->tx_lpi_enabled = false; + edata->advertised &= ~edata->advertised; + } + + return 0; +} +#endif + +#ifdef ETHTOOL_SEEE +static int igb_set_eee(struct net_device *netdev, + struct ethtool_eee *edata) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct ethtool_eee eee_curr; + s32 ret_val; + + if ((hw->mac.type < e1000_i350) || + (hw->phy.media_type != e1000_media_type_copper)) + return -EOPNOTSUPP; + + ret_val = igb_get_eee(netdev, &eee_curr); + if (ret_val) + return ret_val; + + if (eee_curr.eee_enabled) { + if (eee_curr.tx_lpi_enabled != edata->tx_lpi_enabled) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Setting EEE tx-lpi is not supported\n"); + return -EINVAL; + } + + /* Tx LPI time is not implemented currently */ + if (edata->tx_lpi_timer) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Setting EEE Tx LPI timer is not supported\n"); + return -EINVAL; + } + + if (edata->advertised & + ~(ADVERTISE_100_FULL | ADVERTISE_1000_FULL)) { + dev_err(pci_dev_to_dev(adapter->pdev), + "EEE Advertisement supports only 100Tx and or 100T full duplex\n"); + return -EINVAL; + } + + } else if (!edata->eee_enabled) { + dev_err(pci_dev_to_dev(adapter->pdev), + "Setting EEE options is not supported with EEE disabled\n"); + return -EINVAL; + } + + adapter->eee_advert = ethtool_adv_to_mmd_eee_adv_t(edata->advertised); + + if (hw->dev_spec._82575.eee_disable != !edata->eee_enabled) { + hw->dev_spec._82575.eee_disable = !edata->eee_enabled; + + /* reset link */ + if (netif_running(netdev)) + igb_reinit_locked(adapter); + else + igb_reset(adapter); + } + + return 0; +} +#endif /* ETHTOOL_SEEE */ + +#ifdef ETHTOOL_GRXRINGS +static int igb_get_rss_hash_opts(struct igb_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + cmd->data = 0; + + /* Report default options for RSS on igb */ + switch (cmd->flow_type) { + case TCP_V4_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V4_FLOW: + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case IPV4_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case TCP_V6_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V6_FLOW: + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IPV6_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int igb_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, +#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS + void *rule_locs) +#else + u32 *rule_locs) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = adapter->num_rx_queues; + ret = 0; + break; + case ETHTOOL_GRXFH: + ret = igb_get_rss_hash_opts(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +#define UDP_RSS_FLAGS (IGB_FLAG_RSS_FIELD_IPV4_UDP | \ + IGB_FLAG_RSS_FIELD_IPV6_UDP) +static int igb_set_rss_hash_opt(struct igb_adapter *adapter, + struct ethtool_rxnfc *nfc) +{ + u32 flags = adapter->flags; + + /* + * RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + !(nfc->data & RXH_L4_B_0_1) || + !(nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + case UDP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags &= ~IGB_FLAG_RSS_FIELD_IPV4_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags |= IGB_FLAG_RSS_FIELD_IPV4_UDP; + break; + default: + return -EINVAL; + } + break; + case UDP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags &= ~IGB_FLAG_RSS_FIELD_IPV6_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags |= IGB_FLAG_RSS_FIELD_IPV6_UDP; + break; + default: + return -EINVAL; + } + break; + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + (nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* if we changed something we need to update flags */ + if (flags != adapter->flags) { + struct e1000_hw *hw = &adapter->hw; + u32 mrqc = E1000_READ_REG(hw, E1000_MRQC); + + if ((flags & UDP_RSS_FLAGS) && + !(adapter->flags & UDP_RSS_FLAGS)) + DPRINTK(DRV, WARNING, + "enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n"); + + adapter->flags = flags; + + /* Perform hash on these packet types */ + mrqc |= E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP; + + mrqc &= ~(E1000_MRQC_RSS_FIELD_IPV4_UDP | + E1000_MRQC_RSS_FIELD_IPV6_UDP); + + if (flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; + + if (flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); + } + + return 0; +} + +static int igb_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct igb_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXFH: + ret = igb_set_rss_hash_opt(adapter, cmd); + break; + default: + break; + } + + return ret; +} +#endif /* ETHTOOL_GRXRINGS */ + +static const struct ethtool_ops igb_ethtool_ops = { + .get_settings = igb_get_settings, + .set_settings = igb_set_settings, + .get_drvinfo = igb_get_drvinfo, + .get_regs_len = igb_get_regs_len, + .get_regs = igb_get_regs, + .get_wol = igb_get_wol, + .set_wol = igb_set_wol, + .get_msglevel = igb_get_msglevel, + .set_msglevel = igb_set_msglevel, + .nway_reset = igb_nway_reset, + .get_link = igb_get_link, + .get_eeprom_len = igb_get_eeprom_len, + .get_eeprom = igb_get_eeprom, + .set_eeprom = igb_set_eeprom, + .get_ringparam = igb_get_ringparam, + .set_ringparam = igb_set_ringparam, + .get_pauseparam = igb_get_pauseparam, + .set_pauseparam = igb_set_pauseparam, + .self_test = igb_diag_test, + .get_strings = igb_get_strings, +#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#ifdef HAVE_ETHTOOL_SET_PHYS_ID + .set_phys_id = igb_set_phys_id, +#else + .phys_id = igb_phys_id, +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#ifdef HAVE_ETHTOOL_GET_SSET_COUNT + .get_sset_count = igb_get_sset_count, +#else + .get_stats_count = igb_get_stats_count, + .self_test_count = igb_diag_test_count, +#endif + .get_ethtool_stats = igb_get_ethtool_stats, +#ifdef HAVE_ETHTOOL_GET_PERM_ADDR + .get_perm_addr = ethtool_op_get_perm_addr, +#endif + .get_coalesce = igb_get_coalesce, + .set_coalesce = igb_set_coalesce, +#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#ifdef HAVE_ETHTOOL_GET_TS_INFO + .get_ts_info = igb_get_ts_info, +#endif /* HAVE_ETHTOOL_GET_TS_INFO */ +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#ifdef CONFIG_PM_RUNTIME + .begin = igb_ethtool_begin, + .complete = igb_ethtool_complete, +#endif /* CONFIG_PM_RUNTIME */ +#ifndef HAVE_NDO_SET_FEATURES + .get_rx_csum = igb_get_rx_csum, + .set_rx_csum = igb_set_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .set_tx_csum = igb_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, + .set_tso = igb_set_tso, +#endif +#ifdef ETHTOOL_GFLAGS + .get_flags = ethtool_op_get_flags, + .set_flags = igb_set_flags, +#endif /* ETHTOOL_GFLAGS */ +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_GADV_COAL + .get_advcoal = igb_get_adv_coal, + .set_advcoal = igb_set_dmac_coal, +#endif /* ETHTOOL_GADV_COAL */ +#ifndef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#ifdef ETHTOOL_GEEE + .get_eee = igb_get_eee, +#endif +#ifdef ETHTOOL_SEEE + .set_eee = igb_set_eee, +#endif +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#ifdef ETHTOOL_GRXRINGS + .get_rxnfc = igb_get_rxnfc, + .set_rxnfc = igb_set_rxnfc, +#endif +}; + +#ifdef HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +static const struct ethtool_ops_ext igb_ethtool_ops_ext = { + .size = sizeof(struct ethtool_ops_ext), + .get_ts_info = igb_get_ts_info, + .set_phys_id = igb_set_phys_id, + .get_eee = igb_get_eee, + .set_eee = igb_set_eee, +}; + +void igb_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &igb_ethtool_ops); + set_ethtool_ops_ext(netdev, &igb_ethtool_ops_ext); +} +#else +void igb_set_ethtool_ops(struct net_device *netdev) +{ + /* have to "undeclare" const on this struct to remove warnings */ + SET_ETHTOOL_OPS(netdev, (struct ethtool_ops *)&igb_ethtool_ops); +} +#endif /* HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT */ +#endif /* SIOCETHTOOL */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c new file mode 100644 index 00000000..07a1ae07 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_hwmon.c @@ -0,0 +1,260 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" +#include "e1000_82575.h" +#include "e1000_hw.h" +#ifdef IGB_HWMON +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef HAVE_I2C_SUPPORT +static struct i2c_board_info i350_sensor_info = { + I2C_BOARD_INFO("i350bb", (0Xf8 >> 1)), +}; +#endif /* HAVE_I2C_SUPPORT */ + +/* hwmon callback functions */ +static ssize_t igb_hwmon_show_location(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + return sprintf(buf, "loc%u\n", + igb_attr->sensor->location); +} + +static ssize_t igb_hwmon_show_temp(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + unsigned int value; + + /* reset the temp field */ + igb_attr->hw->mac.ops.get_thermal_sensor_data(igb_attr->hw); + + value = igb_attr->sensor->temp; + + /* display millidegree */ + value *= 1000; + + return sprintf(buf, "%u\n", value); +} + +static ssize_t igb_hwmon_show_cautionthresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + unsigned int value = igb_attr->sensor->caution_thresh; + + /* display millidegree */ + value *= 1000; + + return sprintf(buf, "%u\n", value); +} + +static ssize_t igb_hwmon_show_maxopthresh(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct hwmon_attr *igb_attr = container_of(attr, struct hwmon_attr, + dev_attr); + unsigned int value = igb_attr->sensor->max_op_thresh; + + /* display millidegree */ + value *= 1000; + + return sprintf(buf, "%u\n", value); +} + +/* igb_add_hwmon_attr - Create hwmon attr table for a hwmon sysfs file. + * @ adapter: pointer to the adapter structure + * @ offset: offset in the eeprom sensor data table + * @ type: type of sensor data to display + * + * For each file we want in hwmon's sysfs interface we need a device_attribute + * This is included in our hwmon_attr struct that contains the references to + * the data structures we need to get the data to display. + */ +static int igb_add_hwmon_attr(struct igb_adapter *adapter, + unsigned int offset, int type) { + int rc; + unsigned int n_attr; + struct hwmon_attr *igb_attr; + + n_attr = adapter->igb_hwmon_buff.n_hwmon; + igb_attr = &adapter->igb_hwmon_buff.hwmon_list[n_attr]; + + switch (type) { + case IGB_HWMON_TYPE_LOC: + igb_attr->dev_attr.show = igb_hwmon_show_location; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_label", offset); + break; + case IGB_HWMON_TYPE_TEMP: + igb_attr->dev_attr.show = igb_hwmon_show_temp; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_input", offset); + break; + case IGB_HWMON_TYPE_CAUTION: + igb_attr->dev_attr.show = igb_hwmon_show_cautionthresh; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_max", offset); + break; + case IGB_HWMON_TYPE_MAX: + igb_attr->dev_attr.show = igb_hwmon_show_maxopthresh; + snprintf(igb_attr->name, sizeof(igb_attr->name), + "temp%u_crit", offset); + break; + default: + rc = -EPERM; + return rc; + } + + /* These always the same regardless of type */ + igb_attr->sensor = + &adapter->hw.mac.thermal_sensor_data.sensor[offset]; + igb_attr->hw = &adapter->hw; + igb_attr->dev_attr.store = NULL; + igb_attr->dev_attr.attr.mode = S_IRUGO; + igb_attr->dev_attr.attr.name = igb_attr->name; + sysfs_attr_init(&igb_attr->dev_attr.attr); + rc = device_create_file(&adapter->pdev->dev, + &igb_attr->dev_attr); + if (rc == 0) + ++adapter->igb_hwmon_buff.n_hwmon; + + return rc; +} + +static void igb_sysfs_del_adapter(struct igb_adapter *adapter) +{ + int i; + + if (adapter == NULL) + return; + + for (i = 0; i < adapter->igb_hwmon_buff.n_hwmon; i++) { + device_remove_file(&adapter->pdev->dev, + &adapter->igb_hwmon_buff.hwmon_list[i].dev_attr); + } + + kfree(adapter->igb_hwmon_buff.hwmon_list); + + if (adapter->igb_hwmon_buff.device) + hwmon_device_unregister(adapter->igb_hwmon_buff.device); +} + +/* called from igb_main.c */ +void igb_sysfs_exit(struct igb_adapter *adapter) +{ + igb_sysfs_del_adapter(adapter); +} + +/* called from igb_main.c */ +int igb_sysfs_init(struct igb_adapter *adapter) +{ + struct hwmon_buff *igb_hwmon = &adapter->igb_hwmon_buff; + unsigned int i; + int n_attrs; + int rc = 0; +#ifdef HAVE_I2C_SUPPORT + struct i2c_client *client = NULL; +#endif /* HAVE_I2C_SUPPORT */ + + /* If this method isn't defined we don't support thermals */ + if (adapter->hw.mac.ops.init_thermal_sensor_thresh == NULL) + goto exit; + + /* Don't create thermal hwmon interface if no sensors present */ + rc = (adapter->hw.mac.ops.init_thermal_sensor_thresh(&adapter->hw)); + if (rc) + goto exit; +#ifdef HAVE_I2C_SUPPORT + /* init i2c_client */ + client = i2c_new_device(&adapter->i2c_adap, &i350_sensor_info); + if (client == NULL) { + dev_info(&adapter->pdev->dev, + "Failed to create new i2c device..\n"); + goto exit; + } + adapter->i2c_client = client; +#endif /* HAVE_I2C_SUPPORT */ + + /* Allocation space for max attributes + * max num sensors * values (loc, temp, max, caution) + */ + n_attrs = E1000_MAX_SENSORS * 4; + igb_hwmon->hwmon_list = kcalloc(n_attrs, sizeof(struct hwmon_attr), + GFP_KERNEL); + if (!igb_hwmon->hwmon_list) { + rc = -ENOMEM; + goto err; + } + + igb_hwmon->device = hwmon_device_register(&adapter->pdev->dev); + if (IS_ERR(igb_hwmon->device)) { + rc = PTR_ERR(igb_hwmon->device); + goto err; + } + + for (i = 0; i < E1000_MAX_SENSORS; i++) { + + /* Only create hwmon sysfs entries for sensors that have + * meaningful data. + */ + if (adapter->hw.mac.thermal_sensor_data.sensor[i].location == 0) + continue; + + /* Bail if any hwmon attr struct fails to initialize */ + rc = igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_CAUTION); + rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_LOC); + rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_TEMP); + rc |= igb_add_hwmon_attr(adapter, i, IGB_HWMON_TYPE_MAX); + if (rc) + goto err; + } + + goto exit; + +err: + igb_sysfs_del_adapter(adapter); +exit: + return rc; +} +#endif /* IGB_HWMON */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c new file mode 100644 index 00000000..96acec58 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_main.c @@ -0,0 +1,10291 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include +#include +#include +#include +#include +#include +#include +#ifdef NETIF_F_TSO +#include +#ifdef NETIF_F_TSO6 +#include +#include +#endif +#endif +#ifdef SIOCGMIIPHY +#include +#endif +#ifdef SIOCETHTOOL +#include +#endif +#include +#ifdef CONFIG_PM_RUNTIME +#include +#endif /* CONFIG_PM_RUNTIME */ + +#include +#include "igb.h" +#include "igb_vmdq.h" + +#include + +#if defined(DEBUG) || defined (DEBUG_DUMP) || defined (DEBUG_ICR) || defined(DEBUG_ITR) +#define DRV_DEBUG "_debug" +#else +#define DRV_DEBUG +#endif +#define DRV_HW_PERF +#define VERSION_SUFFIX + +#define MAJ 5 +#define MIN 0 +#define BUILD 6 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." __stringify(BUILD) VERSION_SUFFIX DRV_DEBUG DRV_HW_PERF + +char igb_driver_name[] = "igb"; +char igb_driver_version[] = DRV_VERSION; +static const char igb_driver_string[] = + "Intel(R) Gigabit Ethernet Network Driver"; +static const char igb_copyright[] = + "Copyright (c) 2007-2013 Intel Corporation."; + +static DEFINE_PCI_DEVICE_TABLE(igb_pci_tbl) = { + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_1GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I354_BACKPLANE_2_5GBPS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_COPPER_FLASHLESS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I210_SERDES_FLASHLESS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I211_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_I350_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_QUAD_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82580_COPPER_DUAL) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SGMII) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_BACKPLANE) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_DH89XXCC_SFP) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_NS_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_FIBER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_SERDES_QUAD) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER_ET2) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82576_QUAD_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_COPPER) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575EB_FIBER_SERDES) }, + { PCI_VDEVICE(INTEL, E1000_DEV_ID_82575GB_QUAD_COPPER) }, + /* required last entry */ + {0, } +}; + +//MODULE_DEVICE_TABLE(pci, igb_pci_tbl); +static void igb_set_sriov_capability(struct igb_adapter *adapter) __attribute__((__unused__)); +void igb_reset(struct igb_adapter *); +static int igb_setup_all_tx_resources(struct igb_adapter *); +static int igb_setup_all_rx_resources(struct igb_adapter *); +static void igb_free_all_tx_resources(struct igb_adapter *); +static void igb_free_all_rx_resources(struct igb_adapter *); +static void igb_setup_mrqc(struct igb_adapter *); +void igb_update_stats(struct igb_adapter *); +static int igb_probe(struct pci_dev *, const struct pci_device_id *); +static void __devexit igb_remove(struct pci_dev *pdev); +static int igb_sw_init(struct igb_adapter *); +static int igb_open(struct net_device *); +static int igb_close(struct net_device *); +static void igb_configure(struct igb_adapter *); +static void igb_configure_tx(struct igb_adapter *); +static void igb_configure_rx(struct igb_adapter *); +static void igb_clean_all_tx_rings(struct igb_adapter *); +static void igb_clean_all_rx_rings(struct igb_adapter *); +static void igb_clean_tx_ring(struct igb_ring *); +static void igb_set_rx_mode(struct net_device *); +static void igb_update_phy_info(unsigned long); +static void igb_watchdog(unsigned long); +static void igb_watchdog_task(struct work_struct *); +static void igb_dma_err_task(struct work_struct *); +static void igb_dma_err_timer(unsigned long data); +static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *); +static struct net_device_stats *igb_get_stats(struct net_device *); +static int igb_change_mtu(struct net_device *, int); +void igb_full_sync_mac_table(struct igb_adapter *adapter); +static int igb_set_mac(struct net_device *, void *); +static void igb_set_uta(struct igb_adapter *adapter); +static irqreturn_t igb_intr(int irq, void *); +static irqreturn_t igb_intr_msi(int irq, void *); +static irqreturn_t igb_msix_other(int irq, void *); +static irqreturn_t igb_msix_ring(int irq, void *); +#ifdef IGB_DCA +static void igb_update_dca(struct igb_q_vector *); +static void igb_setup_dca(struct igb_adapter *); +#endif /* IGB_DCA */ +static int igb_poll(struct napi_struct *, int); +static bool igb_clean_tx_irq(struct igb_q_vector *); +static bool igb_clean_rx_irq(struct igb_q_vector *, int); +static int igb_ioctl(struct net_device *, struct ifreq *, int cmd); +static void igb_tx_timeout(struct net_device *); +static void igb_reset_task(struct work_struct *); +#ifdef HAVE_VLAN_RX_REGISTER +static void igb_vlan_mode(struct net_device *, struct vlan_group *); +#endif +#ifdef HAVE_VLAN_PROTOCOL +static int igb_vlan_rx_add_vid(struct net_device *, + __be16 proto, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, + __be16 proto, u16); +#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef NETIF_F_HW_VLAN_CTAG_RX +static int igb_vlan_rx_add_vid(struct net_device *, + __always_unused __be16 proto, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, + __always_unused __be16 proto, u16); +#else +static int igb_vlan_rx_add_vid(struct net_device *, u16); +static int igb_vlan_rx_kill_vid(struct net_device *, u16); +#endif +#else +static void igb_vlan_rx_add_vid(struct net_device *, u16); +static void igb_vlan_rx_kill_vid(struct net_device *, u16); +#endif +static void igb_restore_vlan(struct igb_adapter *); +void igb_rar_set(struct igb_adapter *adapter, u32 index); +static void igb_ping_all_vfs(struct igb_adapter *); +static void igb_msg_task(struct igb_adapter *); +static void igb_vmm_control(struct igb_adapter *); +static int igb_set_vf_mac(struct igb_adapter *, int, unsigned char *); +static void igb_restore_vf_multicasts(struct igb_adapter *adapter); +static void igb_process_mdd_event(struct igb_adapter *); +#ifdef IFLA_VF_MAX +static int igb_ndo_set_vf_mac( struct net_device *netdev, int vf, u8 *mac); +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos); +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting); +#endif +#ifdef HAVE_VF_MIN_MAX_TXRATE +static int igb_ndo_set_vf_bw(struct net_device *, int, int, int); +#else /* HAVE_VF_MIN_MAX_TXRATE */ +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +#endif /* HAVE_VF_MIN_MAX_TXRATE */ +static int igb_ndo_get_vf_config(struct net_device *netdev, int vf, + struct ifla_vf_info *ivi); +static void igb_check_vf_rate_limit(struct igb_adapter *); +#endif +static int igb_vf_configure(struct igb_adapter *adapter, int vf); +#ifdef CONFIG_PM +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_suspend(struct device *dev); +static int igb_resume(struct device *dev); +#ifdef CONFIG_PM_RUNTIME +static int igb_runtime_suspend(struct device *dev); +static int igb_runtime_resume(struct device *dev); +static int igb_runtime_idle(struct device *dev); +#endif /* CONFIG_PM_RUNTIME */ +static const struct dev_pm_ops igb_pm_ops = { +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) + .suspend = igb_suspend, + .resume = igb_resume, + .freeze = igb_suspend, + .thaw = igb_resume, + .poweroff = igb_suspend, + .restore = igb_resume, +#ifdef CONFIG_PM_RUNTIME + .runtime_suspend = igb_runtime_suspend, + .runtime_resume = igb_runtime_resume, + .runtime_idle = igb_runtime_idle, +#endif +#else /* Linux >= 2.6.34 */ + SET_SYSTEM_SLEEP_PM_OPS(igb_suspend, igb_resume) +#ifdef CONFIG_PM_RUNTIME + SET_RUNTIME_PM_OPS(igb_runtime_suspend, igb_runtime_resume, + igb_runtime_idle) +#endif /* CONFIG_PM_RUNTIME */ +#endif /* Linux version */ +}; +#else +static int igb_suspend(struct pci_dev *pdev, pm_message_t state); +static int igb_resume(struct pci_dev *pdev); +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +#endif /* CONFIG_PM */ +#ifndef USE_REBOOT_NOTIFIER +static void igb_shutdown(struct pci_dev *); +#else +static int igb_notify_reboot(struct notifier_block *, unsigned long, void *); +static struct notifier_block igb_notifier_reboot = { + .notifier_call = igb_notify_reboot, + .next = NULL, + .priority = 0 +}; +#endif +#ifdef IGB_DCA +static int igb_notify_dca(struct notifier_block *, unsigned long, void *); +static struct notifier_block dca_notifier = { + .notifier_call = igb_notify_dca, + .next = NULL, + .priority = 0 +}; +#endif +#ifdef CONFIG_NET_POLL_CONTROLLER +/* for netdump / net console */ +static void igb_netpoll(struct net_device *); +#endif + +#ifdef HAVE_PCI_ERS +static pci_ers_result_t igb_io_error_detected(struct pci_dev *, + pci_channel_state_t); +static pci_ers_result_t igb_io_slot_reset(struct pci_dev *); +static void igb_io_resume(struct pci_dev *); + +static struct pci_error_handlers igb_err_handler = { + .error_detected = igb_io_error_detected, + .slot_reset = igb_io_slot_reset, + .resume = igb_io_resume, +}; +#endif + +static void igb_init_fw(struct igb_adapter *adapter); +static void igb_init_dmac(struct igb_adapter *adapter, u32 pba); + +static struct pci_driver igb_driver = { + .name = igb_driver_name, + .id_table = igb_pci_tbl, + .probe = igb_probe, + .remove = __devexit_p(igb_remove), +#ifdef CONFIG_PM +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS + .driver.pm = &igb_pm_ops, +#else + .suspend = igb_suspend, + .resume = igb_resume, +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +#endif /* CONFIG_PM */ +#ifndef USE_REBOOT_NOTIFIER + .shutdown = igb_shutdown, +#endif +#ifdef HAVE_PCI_ERS + .err_handler = &igb_err_handler +#endif +}; + +//MODULE_AUTHOR("Intel Corporation, "); +//MODULE_DESCRIPTION("Intel(R) Gigabit Ethernet Network Driver"); +//MODULE_LICENSE("GPL"); +//MODULE_VERSION(DRV_VERSION); + +static void igb_vfta_set(struct igb_adapter *adapter, u32 vid, bool add) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_host_mng_dhcp_cookie *mng_cookie = &hw->mng_cookie; + u32 index = (vid >> E1000_VFTA_ENTRY_SHIFT) & E1000_VFTA_ENTRY_MASK; + u32 mask = 1 << (vid & E1000_VFTA_ENTRY_BIT_SHIFT_MASK); + u32 vfta; + + /* + * if this is the management vlan the only option is to add it in so + * that the management pass through will continue to work + */ + if ((mng_cookie->status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) && + (vid == mng_cookie->vlan_id)) + add = TRUE; + + vfta = adapter->shadow_vfta[index]; + + if (add) + vfta |= mask; + else + vfta &= ~mask; + + e1000_write_vfta(hw, index, vfta); + adapter->shadow_vfta[index] = vfta; +} + +static int debug = NETIF_MSG_DRV | NETIF_MSG_PROBE; +//module_param(debug, int, 0); +//MODULE_PARM_DESC(debug, "Debug level (0=none, ..., 16=all)"); + +/** + * igb_init_module - Driver Registration Routine + * + * igb_init_module is the first routine called when the driver is + * loaded. All it does is register with the PCI subsystem. + **/ +static int __init igb_init_module(void) +{ + int ret; + + printk(KERN_INFO "%s - version %s\n", + igb_driver_string, igb_driver_version); + + printk(KERN_INFO "%s\n", igb_copyright); +#ifdef IGB_HWMON +/* only use IGB_PROCFS if IGB_HWMON is not defined */ +#else +#ifdef IGB_PROCFS + if (igb_procfs_topdir_init()) + printk(KERN_INFO "Procfs failed to initialize topdir\n"); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + +#ifdef IGB_DCA + dca_register_notify(&dca_notifier); +#endif + ret = pci_register_driver(&igb_driver); +#ifdef USE_REBOOT_NOTIFIER + if (ret >= 0) { + register_reboot_notifier(&igb_notifier_reboot); + } +#endif + return ret; +} + +#undef module_init +#define module_init(x) static int x(void) __attribute__((__unused__)); +module_init(igb_init_module); + +/** + * igb_exit_module - Driver Exit Cleanup Routine + * + * igb_exit_module is called just before the driver is removed + * from memory. + **/ +static void __exit igb_exit_module(void) +{ +#ifdef IGB_DCA + dca_unregister_notify(&dca_notifier); +#endif +#ifdef USE_REBOOT_NOTIFIER + unregister_reboot_notifier(&igb_notifier_reboot); +#endif + pci_unregister_driver(&igb_driver); + +#ifdef IGB_HWMON +/* only compile IGB_PROCFS if IGB_HWMON is not defined */ +#else +#ifdef IGB_PROCFS + igb_procfs_topdir_exit(); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ +} + +#undef module_exit +#define module_exit(x) static void x(void) __attribute__((__unused__)); +module_exit(igb_exit_module); + +#define Q_IDX_82576(i) (((i & 0x1) << 3) + (i >> 1)) +/** + * igb_cache_ring_register - Descriptor ring to register mapping + * @adapter: board private structure to initialize + * + * Once we know the feature-set enabled for the device, we'll cache + * the register offset the descriptor ring is assigned to. + **/ +static void igb_cache_ring_register(struct igb_adapter *adapter) +{ + int i = 0, j = 0; + u32 rbase_offset = adapter->vfs_allocated_count; + + switch (adapter->hw.mac.type) { + case e1000_82576: + /* The queues are allocated for virtualization such that VF 0 + * is allocated queues 0 and 8, VF 1 queues 1 and 9, etc. + * In order to avoid collision we start at the first free queue + * and continue consuming queues in the same sequence + */ + if ((adapter->rss_queues > 1) && adapter->vmdq_pools) { + for (; i < adapter->rss_queues; i++) + adapter->rx_ring[i]->reg_idx = rbase_offset + + Q_IDX_82576(i); + } + case e1000_82575: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + default: + for (; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->reg_idx = rbase_offset + i; + for (; j < adapter->num_tx_queues; j++) + adapter->tx_ring[j]->reg_idx = rbase_offset + j; + break; + } +} + +static void igb_configure_lli(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 port; + + /* LLI should only be enabled for MSI-X or MSI interrupts */ + if (!adapter->msix_entries && !(adapter->flags & IGB_FLAG_HAS_MSI)) + return; + + if (adapter->lli_port) { + /* use filter 0 for port */ + port = htons((u16)adapter->lli_port); + E1000_WRITE_REG(hw, E1000_IMIR(0), + (port | E1000_IMIR_PORT_IM_EN)); + E1000_WRITE_REG(hw, E1000_IMIREXT(0), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + } + + if (adapter->flags & IGB_FLAG_LLI_PUSH) { + /* use filter 1 for push flag */ + E1000_WRITE_REG(hw, E1000_IMIR(1), + (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN)); + E1000_WRITE_REG(hw, E1000_IMIREXT(1), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_PSH)); + } + + if (adapter->lli_size) { + /* use filter 2 for size */ + E1000_WRITE_REG(hw, E1000_IMIR(2), + (E1000_IMIR_PORT_BP | E1000_IMIR_PORT_IM_EN)); + E1000_WRITE_REG(hw, E1000_IMIREXT(2), + (adapter->lli_size | E1000_IMIREXT_CTRL_BP)); + } + +} + +/** + * igb_write_ivar - configure ivar for given MSI-X vector + * @hw: pointer to the HW structure + * @msix_vector: vector number we are allocating to a given ring + * @index: row index of IVAR register to write within IVAR table + * @offset: column offset of in IVAR, should be multiple of 8 + * + * This function is intended to handle the writing of the IVAR register + * for adapters 82576 and newer. The IVAR table consists of 2 columns, + * each containing an cause allocation for an Rx and Tx ring, and a + * variable number of rows depending on the number of queues supported. + **/ +static void igb_write_ivar(struct e1000_hw *hw, int msix_vector, + int index, int offset) +{ + u32 ivar = E1000_READ_REG_ARRAY(hw, E1000_IVAR0, index); + + /* clear any bits that are currently set */ + ivar &= ~((u32)0xFF << offset); + + /* write vector and valid bit */ + ivar |= (msix_vector | E1000_IVAR_VALID) << offset; + + E1000_WRITE_REG_ARRAY(hw, E1000_IVAR0, index, ivar); +} + +#define IGB_N0_QUEUE -1 +static void igb_assign_vector(struct igb_q_vector *q_vector, int msix_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + int rx_queue = IGB_N0_QUEUE; + int tx_queue = IGB_N0_QUEUE; + u32 msixbm = 0; + + if (q_vector->rx.ring) + rx_queue = q_vector->rx.ring->reg_idx; + if (q_vector->tx.ring) + tx_queue = q_vector->tx.ring->reg_idx; + + switch (hw->mac.type) { + case e1000_82575: + /* The 82575 assigns vectors using a bitmask, which matches the + bitmask for the EICR/EIMS/EIMC registers. To assign one + or more queues to a vector, we write the appropriate bits + into the MSIXBM register for that vector. */ + if (rx_queue > IGB_N0_QUEUE) + msixbm = E1000_EICR_RX_QUEUE0 << rx_queue; + if (tx_queue > IGB_N0_QUEUE) + msixbm |= E1000_EICR_TX_QUEUE0 << tx_queue; + if (!adapter->msix_entries && msix_vector == 0) + msixbm |= E1000_EIMS_OTHER; + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), msix_vector, msixbm); + q_vector->eims_value = msixbm; + break; + case e1000_82576: + /* + * 82576 uses a table that essentially consists of 2 columns + * with 8 rows. The ordering is column-major so we use the + * lower 3 bits as the row index, and the 4th bit as the + * column offset. + */ + if (rx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + rx_queue & 0x7, + (rx_queue & 0x8) << 1); + if (tx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + tx_queue & 0x7, + ((tx_queue & 0x8) << 1) + 8); + q_vector->eims_value = 1 << msix_vector; + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* + * On 82580 and newer adapters the scheme is similar to 82576 + * however instead of ordering column-major we have things + * ordered row-major. So we traverse the table by using + * bit 0 as the column offset, and the remaining bits as the + * row index. + */ + if (rx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + rx_queue >> 1, + (rx_queue & 0x1) << 4); + if (tx_queue > IGB_N0_QUEUE) + igb_write_ivar(hw, msix_vector, + tx_queue >> 1, + ((tx_queue & 0x1) << 4) + 8); + q_vector->eims_value = 1 << msix_vector; + break; + default: + BUG(); + break; + } + + /* add q_vector eims value to global eims_enable_mask */ + adapter->eims_enable_mask |= q_vector->eims_value; + + /* configure q_vector to set itr on first interrupt */ + q_vector->set_itr = 1; +} + +/** + * igb_configure_msix - Configure MSI-X hardware + * + * igb_configure_msix sets up the hardware to properly + * generate MSI-X interrupts. + **/ +static void igb_configure_msix(struct igb_adapter *adapter) +{ + u32 tmp; + int i, vector = 0; + struct e1000_hw *hw = &adapter->hw; + + adapter->eims_enable_mask = 0; + + /* set vector for other causes, i.e. link changes */ + switch (hw->mac.type) { + case e1000_82575: + tmp = E1000_READ_REG(hw, E1000_CTRL_EXT); + /* enable MSI-X PBA support*/ + tmp |= E1000_CTRL_EXT_PBA_CLR; + + /* Auto-Mask interrupts upon ICR read. */ + tmp |= E1000_CTRL_EXT_EIAME; + tmp |= E1000_CTRL_EXT_IRCA; + + E1000_WRITE_REG(hw, E1000_CTRL_EXT, tmp); + + /* enable msix_other interrupt */ + E1000_WRITE_REG_ARRAY(hw, E1000_MSIXBM(0), vector++, + E1000_EIMS_OTHER); + adapter->eims_other = E1000_EIMS_OTHER; + + break; + + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* Turn on MSI-X capability first, or our settings + * won't stick. And it will take days to debug. */ + E1000_WRITE_REG(hw, E1000_GPIE, E1000_GPIE_MSIX_MODE | + E1000_GPIE_PBA | E1000_GPIE_EIAME | + E1000_GPIE_NSICR); + + /* enable msix_other interrupt */ + adapter->eims_other = 1 << vector; + tmp = (vector++ | E1000_IVAR_VALID) << 8; + + E1000_WRITE_REG(hw, E1000_IVAR_MISC, tmp); + break; + default: + /* do nothing, since nothing else supports MSI-X */ + break; + } /* switch (hw->mac.type) */ + + adapter->eims_enable_mask |= adapter->eims_other; + + for (i = 0; i < adapter->num_q_vectors; i++) + igb_assign_vector(adapter->q_vector[i], vector++); + + E1000_WRITE_FLUSH(hw); +} + +/** + * igb_request_msix - Initialize MSI-X interrupts + * + * igb_request_msix allocates MSI-X vectors and requests interrupts from the + * kernel. + **/ +static int igb_request_msix(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + int i, err = 0, vector = 0, free_vector = 0; + + err = request_irq(adapter->msix_entries[vector].vector, + &igb_msix_other, 0, netdev->name, adapter); + if (err) + goto err_out; + + for (i = 0; i < adapter->num_q_vectors; i++) { + struct igb_q_vector *q_vector = adapter->q_vector[i]; + + vector++; + + q_vector->itr_register = hw->hw_addr + E1000_EITR(vector); + + if (q_vector->rx.ring && q_vector->tx.ring) + sprintf(q_vector->name, "%s-TxRx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else if (q_vector->tx.ring) + sprintf(q_vector->name, "%s-tx-%u", netdev->name, + q_vector->tx.ring->queue_index); + else if (q_vector->rx.ring) + sprintf(q_vector->name, "%s-rx-%u", netdev->name, + q_vector->rx.ring->queue_index); + else + sprintf(q_vector->name, "%s-unused", netdev->name); + + err = request_irq(adapter->msix_entries[vector].vector, + igb_msix_ring, 0, q_vector->name, + q_vector); + if (err) + goto err_free; + } + + igb_configure_msix(adapter); + return 0; + +err_free: + /* free already assigned IRQs */ + free_irq(adapter->msix_entries[free_vector++].vector, adapter); + + vector--; + for (i = 0; i < vector; i++) { + free_irq(adapter->msix_entries[free_vector++].vector, + adapter->q_vector[i]); + } +err_out: + return err; +} + +static void igb_reset_interrupt_capability(struct igb_adapter *adapter) +{ + if (adapter->msix_entries) { + pci_disable_msix(adapter->pdev); + kfree(adapter->msix_entries); + adapter->msix_entries = NULL; + } else if (adapter->flags & IGB_FLAG_HAS_MSI) { + pci_disable_msi(adapter->pdev); + } +} + +/** + * igb_free_q_vector - Free memory allocated for specific interrupt vector + * @adapter: board private structure to initialize + * @v_idx: Index of vector to be freed + * + * This function frees the memory allocated to the q_vector. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void igb_free_q_vector(struct igb_adapter *adapter, int v_idx) +{ + struct igb_q_vector *q_vector = adapter->q_vector[v_idx]; + + if (q_vector->tx.ring) + adapter->tx_ring[q_vector->tx.ring->queue_index] = NULL; + + if (q_vector->rx.ring) + adapter->tx_ring[q_vector->rx.ring->queue_index] = NULL; + + adapter->q_vector[v_idx] = NULL; + netif_napi_del(&q_vector->napi); +#ifndef IGB_NO_LRO + __skb_queue_purge(&q_vector->lrolist.active); +#endif + kfree(q_vector); +} + +/** + * igb_free_q_vectors - Free memory allocated for interrupt vectors + * @adapter: board private structure to initialize + * + * This function frees the memory allocated to the q_vectors. In addition if + * NAPI is enabled it will delete any references to the NAPI struct prior + * to freeing the q_vector. + **/ +static void igb_free_q_vectors(struct igb_adapter *adapter) +{ + int v_idx = adapter->num_q_vectors; + + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); +} + +/** + * igb_clear_interrupt_scheme - reset the device to a state of no interrupts + * + * This function resets the device so that it has 0 rx queues, tx queues, and + * MSI-X interrupts allocated. + */ +static void igb_clear_interrupt_scheme(struct igb_adapter *adapter) +{ + igb_free_q_vectors(adapter); + igb_reset_interrupt_capability(adapter); +} + +/** + * igb_process_mdd_event + * @adapter - board private structure + * + * Identify a malicious VF, disable the VF TX/RX queues and log a message. + */ +static void igb_process_mdd_event(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 lvmmc, vfte, vfre, mdfb; + u8 vf_queue; + + lvmmc = E1000_READ_REG(hw, E1000_LVMMC); + vf_queue = lvmmc >> 29; + + /* VF index cannot be bigger or equal to VFs allocated */ + if (vf_queue >= adapter->vfs_allocated_count) + return; + + netdev_info(adapter->netdev, + "VF %d misbehaved. VF queues are disabled. " + "VM misbehavior code is 0x%x\n", vf_queue, lvmmc); + + /* Disable VFTE and VFRE related bits */ + vfte = E1000_READ_REG(hw, E1000_VFTE); + vfte &= ~(1 << vf_queue); + E1000_WRITE_REG(hw, E1000_VFTE, vfte); + + vfre = E1000_READ_REG(hw, E1000_VFRE); + vfre &= ~(1 << vf_queue); + E1000_WRITE_REG(hw, E1000_VFRE, vfre); + + /* Disable MDFB related bit. Clear on write */ + mdfb = E1000_READ_REG(hw, E1000_MDFB); + mdfb |= (1 << vf_queue); + E1000_WRITE_REG(hw, E1000_MDFB, mdfb); + + /* Reset the specific VF */ + E1000_WRITE_REG(hw, E1000_VTCTRL(vf_queue), E1000_VTCTRL_RST); +} + +/** + * igb_disable_mdd + * @adapter - board private structure + * + * Disable MDD behavior in the HW + **/ +static void igb_disable_mdd(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + if ((hw->mac.type != e1000_i350) || + (hw->mac.type != e1000_i354)) + return; + + reg = E1000_READ_REG(hw, E1000_DTXCTL); + reg &= (~E1000_DTXCTL_MDP_EN); + E1000_WRITE_REG(hw, E1000_DTXCTL, reg); +} + +/** + * igb_enable_mdd + * @adapter - board private structure + * + * Enable the HW to detect malicious driver and sends an interrupt to + * the driver. + **/ +static void igb_enable_mdd(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg; + + /* Only available on i350 device */ + if (hw->mac.type != e1000_i350) + return; + + reg = E1000_READ_REG(hw, E1000_DTXCTL); + reg |= E1000_DTXCTL_MDP_EN; + E1000_WRITE_REG(hw, E1000_DTXCTL, reg); +} + +/** + * igb_reset_sriov_capability - disable SR-IOV if enabled + * + * Attempt to disable single root IO virtualization capabilites present in the + * kernel. + **/ +static void igb_reset_sriov_capability(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + + /* reclaim resources allocated to VFs */ + if (adapter->vf_data) { + if (!pci_vfs_assigned(pdev)) { + /* + * disable iov and allow time for transactions to + * clear + */ + pci_disable_sriov(pdev); + msleep(500); + + dev_info(pci_dev_to_dev(pdev), "IOV Disabled\n"); + } else { + dev_info(pci_dev_to_dev(pdev), "IOV Not Disabled\n " + "VF(s) are assigned to guests!\n"); + } + /* Disable Malicious Driver Detection */ + igb_disable_mdd(adapter); + + /* free vf data storage */ + kfree(adapter->vf_data); + adapter->vf_data = NULL; + + /* switch rings back to PF ownership */ + E1000_WRITE_REG(hw, E1000_IOVCTL, + E1000_IOVCTL_REUSE_VFQ); + E1000_WRITE_FLUSH(hw); + msleep(100); + } + + adapter->vfs_allocated_count = 0; +} + +/** + * igb_set_sriov_capability - setup SR-IOV if supported + * + * Attempt to enable single root IO virtualization capabilites present in the + * kernel. + **/ +static void igb_set_sriov_capability(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int old_vfs = 0; + int i; + + old_vfs = pci_num_vf(pdev); + if (old_vfs) { + dev_info(pci_dev_to_dev(pdev), + "%d pre-allocated VFs found - override " + "max_vfs setting of %d\n", old_vfs, + adapter->vfs_allocated_count); + adapter->vfs_allocated_count = old_vfs; + } + /* no VFs requested, do nothing */ + if (!adapter->vfs_allocated_count) + return; + + /* allocate vf data storage */ + adapter->vf_data = kcalloc(adapter->vfs_allocated_count, + sizeof(struct vf_data_storage), + GFP_KERNEL); + + if (adapter->vf_data) { + if (!old_vfs) { + if (pci_enable_sriov(pdev, + adapter->vfs_allocated_count)) + goto err_out; + } + for (i = 0; i < adapter->vfs_allocated_count; i++) + igb_vf_configure(adapter, i); + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_i350: + /* Enable VM to VM loopback by default */ + adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE; + break; + default: + /* Currently no other hardware supports loopback */ + break; + } + + /* DMA Coalescing is not supported in IOV mode. */ + if (adapter->hw.mac.type >= e1000_i350) + adapter->dmac = IGB_DMAC_DISABLE; + if (adapter->hw.mac.type < e1000_i350) + adapter->flags |= IGB_FLAG_DETECT_BAD_DMA; + return; + + } + +err_out: + kfree(adapter->vf_data); + adapter->vf_data = NULL; + adapter->vfs_allocated_count = 0; + dev_warn(pci_dev_to_dev(pdev), + "Failed to initialize SR-IOV virtualization\n"); +} + +/** + * igb_set_interrupt_capability - set MSI or MSI-X if supported + * + * Attempt to configure interrupts using the best available + * capabilities of the hardware and kernel. + **/ +static void igb_set_interrupt_capability(struct igb_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err; + int numvecs, i; + + if (!msix) + adapter->int_mode = IGB_INT_MODE_MSI; + + /* Number of supported queues. */ + adapter->num_rx_queues = adapter->rss_queues; + + if (adapter->vmdq_pools > 1) + adapter->num_rx_queues += adapter->vmdq_pools - 1; + +#ifdef HAVE_TX_MQ + if (adapter->vmdq_pools) + adapter->num_tx_queues = adapter->vmdq_pools; + else + adapter->num_tx_queues = adapter->num_rx_queues; +#else + adapter->num_tx_queues = max_t(u32, 1, adapter->vmdq_pools); +#endif + + switch (adapter->int_mode) { + case IGB_INT_MODE_MSIX: + /* start with one vector for every rx queue */ + numvecs = adapter->num_rx_queues; + + /* if tx handler is separate add 1 for every tx queue */ + if (!(adapter->flags & IGB_FLAG_QUEUE_PAIRS)) + numvecs += adapter->num_tx_queues; + + /* store the number of vectors reserved for queues */ + adapter->num_q_vectors = numvecs; + + /* add 1 vector for link status interrupts */ + numvecs++; + adapter->msix_entries = kcalloc(numvecs, + sizeof(struct msix_entry), + GFP_KERNEL); + if (adapter->msix_entries) { + for (i = 0; i < numvecs; i++) + adapter->msix_entries[i].entry = i; + + err = pci_enable_msix(pdev, + adapter->msix_entries, numvecs); + if (err == 0) + break; + } + /* MSI-X failed, so fall through and try MSI */ + dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI-X interrupts. " + "Falling back to MSI interrupts.\n"); + igb_reset_interrupt_capability(adapter); + case IGB_INT_MODE_MSI: + if (!pci_enable_msi(pdev)) + adapter->flags |= IGB_FLAG_HAS_MSI; + else + dev_warn(pci_dev_to_dev(pdev), "Failed to initialize MSI " + "interrupts. Falling back to legacy " + "interrupts.\n"); + /* Fall through */ + case IGB_INT_MODE_LEGACY: + /* disable advanced features and set number of queues to 1 */ + igb_reset_sriov_capability(adapter); + adapter->vmdq_pools = 0; + adapter->rss_queues = 1; + adapter->flags |= IGB_FLAG_QUEUE_PAIRS; + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_q_vectors = 1; + /* Don't do anything; this is system default */ + break; + } +} + +static void igb_add_ring(struct igb_ring *ring, + struct igb_ring_container *head) +{ + head->ring = ring; + head->count++; +} + +/** + * igb_alloc_q_vector - Allocate memory for a single interrupt vector + * @adapter: board private structure to initialize + * @v_count: q_vectors allocated on adapter, used for ring interleaving + * @v_idx: index of vector in adapter struct + * @txr_count: total number of Tx rings to allocate + * @txr_idx: index of first Tx ring to allocate + * @rxr_count: total number of Rx rings to allocate + * @rxr_idx: index of first Rx ring to allocate + * + * We allocate one q_vector. If allocation fails we return -ENOMEM. + **/ +static int igb_alloc_q_vector(struct igb_adapter *adapter, + unsigned int v_count, unsigned int v_idx, + unsigned int txr_count, unsigned int txr_idx, + unsigned int rxr_count, unsigned int rxr_idx) +{ + struct igb_q_vector *q_vector; + struct igb_ring *ring; + int ring_count, size; + + /* igb only supports 1 Tx and/or 1 Rx queue per vector */ + if (txr_count > 1 || rxr_count > 1) + return -ENOMEM; + + ring_count = txr_count + rxr_count; + size = sizeof(struct igb_q_vector) + + (sizeof(struct igb_ring) * ring_count); + + /* allocate q_vector and rings */ + q_vector = kzalloc(size, GFP_KERNEL); + if (!q_vector) + return -ENOMEM; + +#ifndef IGB_NO_LRO + /* initialize LRO */ + __skb_queue_head_init(&q_vector->lrolist.active); + +#endif + /* initialize NAPI */ + netif_napi_add(adapter->netdev, &q_vector->napi, + igb_poll, 64); + + /* tie q_vector and adapter together */ + adapter->q_vector[v_idx] = q_vector; + q_vector->adapter = adapter; + + /* initialize work limits */ + q_vector->tx.work_limit = adapter->tx_work_limit; + + /* initialize ITR configuration */ + q_vector->itr_register = adapter->hw.hw_addr + E1000_EITR(0); + q_vector->itr_val = IGB_START_ITR; + + /* initialize pointer to rings */ + ring = q_vector->ring; + + /* intialize ITR */ + if (rxr_count) { + /* rx or rx/tx vector */ + if (!adapter->rx_itr_setting || adapter->rx_itr_setting > 3) + q_vector->itr_val = adapter->rx_itr_setting; + } else { + /* tx only vector */ + if (!adapter->tx_itr_setting || adapter->tx_itr_setting > 3) + q_vector->itr_val = adapter->tx_itr_setting; + } + + if (txr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Tx values */ + igb_add_ring(ring, &q_vector->tx); + + /* For 82575, context index must be unique per ring. */ + if (adapter->hw.mac.type == e1000_82575) + set_bit(IGB_RING_FLAG_TX_CTX_IDX, &ring->flags); + + /* apply Tx specific ring traits */ + ring->count = adapter->tx_ring_count; + ring->queue_index = txr_idx; + + /* assign ring to adapter */ + adapter->tx_ring[txr_idx] = ring; + + /* push pointer to next ring */ + ring++; + } + + if (rxr_count) { + /* assign generic ring traits */ + ring->dev = &adapter->pdev->dev; + ring->netdev = adapter->netdev; + + /* configure backlink on ring */ + ring->q_vector = q_vector; + + /* update q_vector Rx values */ + igb_add_ring(ring, &q_vector->rx); + +#ifndef HAVE_NDO_SET_FEATURES + /* enable rx checksum */ + set_bit(IGB_RING_FLAG_RX_CSUM, &ring->flags); + +#endif + /* set flag indicating ring supports SCTP checksum offload */ + if (adapter->hw.mac.type >= e1000_82576) + set_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags); + + if ((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) + set_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &ring->flags); + + /* apply Rx specific ring traits */ + ring->count = adapter->rx_ring_count; + ring->queue_index = rxr_idx; + + /* assign ring to adapter */ + adapter->rx_ring[rxr_idx] = ring; + } + + return 0; +} + +/** + * igb_alloc_q_vectors - Allocate memory for interrupt vectors + * @adapter: board private structure to initialize + * + * We allocate one q_vector per queue interrupt. If allocation fails we + * return -ENOMEM. + **/ +static int igb_alloc_q_vectors(struct igb_adapter *adapter) +{ + int q_vectors = adapter->num_q_vectors; + int rxr_remaining = adapter->num_rx_queues; + int txr_remaining = adapter->num_tx_queues; + int rxr_idx = 0, txr_idx = 0, v_idx = 0; + int err; + + if (q_vectors >= (rxr_remaining + txr_remaining)) { + for (; rxr_remaining; v_idx++) { + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + 0, 0, 1, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining--; + rxr_idx++; + } + } + + for (; v_idx < q_vectors; v_idx++) { + int rqpv = DIV_ROUND_UP(rxr_remaining, q_vectors - v_idx); + int tqpv = DIV_ROUND_UP(txr_remaining, q_vectors - v_idx); + err = igb_alloc_q_vector(adapter, q_vectors, v_idx, + tqpv, txr_idx, rqpv, rxr_idx); + + if (err) + goto err_out; + + /* update counts and index */ + rxr_remaining -= rqpv; + txr_remaining -= tqpv; + rxr_idx++; + txr_idx++; + } + + return 0; + +err_out: + adapter->num_tx_queues = 0; + adapter->num_rx_queues = 0; + adapter->num_q_vectors = 0; + + while (v_idx--) + igb_free_q_vector(adapter, v_idx); + + return -ENOMEM; +} + +/** + * igb_init_interrupt_scheme - initialize interrupts, allocate queues/vectors + * + * This function initializes the interrupts and allocates all of the queues. + **/ +static int igb_init_interrupt_scheme(struct igb_adapter *adapter, bool msix) +{ + struct pci_dev *pdev = adapter->pdev; + int err; + + igb_set_interrupt_capability(adapter, msix); + + err = igb_alloc_q_vectors(adapter); + if (err) { + dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for vectors\n"); + goto err_alloc_q_vectors; + } + + igb_cache_ring_register(adapter); + + return 0; + +err_alloc_q_vectors: + igb_reset_interrupt_capability(adapter); + return err; +} + +/** + * igb_request_irq - initialize interrupts + * + * Attempts to configure interrupts using the best available + * capabilities of the hardware and kernel. + **/ +static int igb_request_irq(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + int err = 0; + + if (adapter->msix_entries) { + err = igb_request_msix(adapter); + if (!err) + goto request_done; + /* fall back to MSI */ + igb_free_all_tx_resources(adapter); + igb_free_all_rx_resources(adapter); + + igb_clear_interrupt_scheme(adapter); + igb_reset_sriov_capability(adapter); + err = igb_init_interrupt_scheme(adapter, false); + if (err) + goto request_done; + igb_setup_all_tx_resources(adapter); + igb_setup_all_rx_resources(adapter); + igb_configure(adapter); + } + + igb_assign_vector(adapter->q_vector[0], 0); + + if (adapter->flags & IGB_FLAG_HAS_MSI) { + err = request_irq(pdev->irq, &igb_intr_msi, 0, + netdev->name, adapter); + if (!err) + goto request_done; + + /* fall back to legacy interrupts */ + igb_reset_interrupt_capability(adapter); + adapter->flags &= ~IGB_FLAG_HAS_MSI; + } + + err = request_irq(pdev->irq, &igb_intr, IRQF_SHARED, + netdev->name, adapter); + + if (err) + dev_err(pci_dev_to_dev(pdev), "Error %d getting interrupt\n", + err); + +request_done: + return err; +} + +static void igb_free_irq(struct igb_adapter *adapter) +{ + if (adapter->msix_entries) { + int vector = 0, i; + + free_irq(adapter->msix_entries[vector++].vector, adapter); + + for (i = 0; i < adapter->num_q_vectors; i++) + free_irq(adapter->msix_entries[vector++].vector, + adapter->q_vector[i]); + } else { + free_irq(adapter->pdev->irq, adapter); + } +} + +/** + * igb_irq_disable - Mask off interrupt generation on the NIC + * @adapter: board private structure + **/ +static void igb_irq_disable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * we need to be careful when disabling interrupts. The VFs are also + * mapped into these registers and so clearing the bits can cause + * issues on the VF drivers so we only need to clear what we set + */ + if (adapter->msix_entries) { + u32 regval = E1000_READ_REG(hw, E1000_EIAM); + E1000_WRITE_REG(hw, E1000_EIAM, regval & ~adapter->eims_enable_mask); + E1000_WRITE_REG(hw, E1000_EIMC, adapter->eims_enable_mask); + regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval & ~adapter->eims_enable_mask); + } + + E1000_WRITE_REG(hw, E1000_IAM, 0); + E1000_WRITE_REG(hw, E1000_IMC, ~0); + E1000_WRITE_FLUSH(hw); + + if (adapter->msix_entries) { + int vector = 0, i; + + synchronize_irq(adapter->msix_entries[vector++].vector); + + for (i = 0; i < adapter->num_q_vectors; i++) + synchronize_irq(adapter->msix_entries[vector++].vector); + } else { + synchronize_irq(adapter->pdev->irq); + } +} + +/** + * igb_irq_enable - Enable default interrupt generation settings + * @adapter: board private structure + **/ +static void igb_irq_enable(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + if (adapter->msix_entries) { + u32 ims = E1000_IMS_LSC | E1000_IMS_DOUTSYNC | E1000_IMS_DRSTA; + u32 regval = E1000_READ_REG(hw, E1000_EIAC); + E1000_WRITE_REG(hw, E1000_EIAC, regval | adapter->eims_enable_mask); + regval = E1000_READ_REG(hw, E1000_EIAM); + E1000_WRITE_REG(hw, E1000_EIAM, regval | adapter->eims_enable_mask); + E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_enable_mask); + if (adapter->vfs_allocated_count) { + E1000_WRITE_REG(hw, E1000_MBVFIMR, 0xFF); + ims |= E1000_IMS_VMMB; + if (adapter->mdd) + if ((adapter->hw.mac.type == e1000_i350) || + (adapter->hw.mac.type == e1000_i354)) + ims |= E1000_IMS_MDDET; + } + E1000_WRITE_REG(hw, E1000_IMS, ims); + } else { + E1000_WRITE_REG(hw, E1000_IMS, IMS_ENABLE_MASK | + E1000_IMS_DRSTA); + E1000_WRITE_REG(hw, E1000_IAM, IMS_ENABLE_MASK | + E1000_IMS_DRSTA); + } +} + +static void igb_update_mng_vlan(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 vid = adapter->hw.mng_cookie.vlan_id; + u16 old_vid = adapter->mng_vlan_id; + + if (hw->mng_cookie.status & E1000_MNG_DHCP_COOKIE_STATUS_VLAN) { + /* add VID to filter table */ + igb_vfta_set(adapter, vid, TRUE); + adapter->mng_vlan_id = vid; + } else { + adapter->mng_vlan_id = IGB_MNG_VLAN_NONE; + } + + if ((old_vid != (u16)IGB_MNG_VLAN_NONE) && + (vid != old_vid) && +#ifdef HAVE_VLAN_RX_REGISTER + !vlan_group_get_device(adapter->vlgrp, old_vid)) { +#else + !test_bit(old_vid, adapter->active_vlans)) { +#endif + /* remove VID from filter table */ + igb_vfta_set(adapter, old_vid, FALSE); + } +} + +/** + * igb_release_hw_control - release control of the h/w to f/w + * @adapter: address of board private structure + * + * igb_release_hw_control resets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that the + * driver is no longer loaded. + * + **/ +static void igb_release_hw_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext; + + /* Let firmware take over control of h/w */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + E1000_WRITE_REG(hw, E1000_CTRL_EXT, + ctrl_ext & ~E1000_CTRL_EXT_DRV_LOAD); +} + +/** + * igb_get_hw_control - get control of the h/w from f/w + * @adapter: address of board private structure + * + * igb_get_hw_control sets CTRL_EXT:DRV_LOAD bit. + * For ASF and Pass Through versions of f/w this means that + * the driver is loaded. + * + **/ +static void igb_get_hw_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext; + + /* Let firmware know the driver has taken over */ + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + E1000_WRITE_REG(hw, E1000_CTRL_EXT, + ctrl_ext | E1000_CTRL_EXT_DRV_LOAD); +} + +/** + * igb_configure - configure the hardware for RX and TX + * @adapter: private board structure + **/ +static void igb_configure(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + int i; + + igb_get_hw_control(adapter); + igb_set_rx_mode(netdev); + + igb_restore_vlan(adapter); + + igb_setup_tctl(adapter); + igb_setup_mrqc(adapter); + igb_setup_rctl(adapter); + + igb_configure_tx(adapter); + igb_configure_rx(adapter); + + e1000_rx_fifo_flush_82575(&adapter->hw); +#ifdef CONFIG_NETDEVICES_MULTIQUEUE + if (adapter->num_tx_queues > 1) + netdev->features |= NETIF_F_MULTI_QUEUE; + else + netdev->features &= ~NETIF_F_MULTI_QUEUE; +#endif + + /* call igb_desc_unused which always leaves + * at least 1 descriptor unused to make sure + * next_to_use != next_to_clean */ + for (i = 0; i < adapter->num_rx_queues; i++) { + struct igb_ring *ring = adapter->rx_ring[i]; + igb_alloc_rx_buffers(ring, igb_desc_unused(ring)); + } +} + +/** + * igb_power_up_link - Power up the phy/serdes link + * @adapter: address of board private structure + **/ +void igb_power_up_link(struct igb_adapter *adapter) +{ + e1000_phy_hw_reset(&adapter->hw); + + if (adapter->hw.phy.media_type == e1000_media_type_copper) + e1000_power_up_phy(&adapter->hw); + else + e1000_power_up_fiber_serdes_link(&adapter->hw); +} + +/** + * igb_power_down_link - Power down the phy/serdes link + * @adapter: address of board private structure + */ +static void igb_power_down_link(struct igb_adapter *adapter) +{ + if (adapter->hw.phy.media_type == e1000_media_type_copper) + e1000_power_down_phy(&adapter->hw); + else + e1000_shutdown_fiber_serdes_link(&adapter->hw); +} + +/* Detect and switch function for Media Auto Sense */ +static void igb_check_swap_media(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ctrl_ext, connsw; + bool swap_now = false; + bool link; + + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + connsw = E1000_READ_REG(hw, E1000_CONNSW); + link = igb_has_link(adapter); + + /* need to live swap if current media is copper and we have fiber/serdes + * to go to. + */ + + if ((hw->phy.media_type == e1000_media_type_copper) && + (!(connsw & E1000_CONNSW_AUTOSENSE_EN))) { + swap_now = true; + } else if (!(connsw & E1000_CONNSW_SERDESD)) { + /* copper signal takes time to appear */ + if (adapter->copper_tries < 2) { + adapter->copper_tries++; + connsw |= E1000_CONNSW_AUTOSENSE_CONF; + E1000_WRITE_REG(hw, E1000_CONNSW, connsw); + return; + } else { + adapter->copper_tries = 0; + if ((connsw & E1000_CONNSW_PHYSD) && + (!(connsw & E1000_CONNSW_PHY_PDN))) { + swap_now = true; + connsw &= ~E1000_CONNSW_AUTOSENSE_CONF; + E1000_WRITE_REG(hw, E1000_CONNSW, connsw); + } + } + } + + if (swap_now) { + switch (hw->phy.media_type) { + case e1000_media_type_copper: + dev_info(pci_dev_to_dev(adapter->pdev), + "%s:MAS: changing media to fiber/serdes\n", + adapter->netdev->name); + ctrl_ext |= + E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + adapter->copper_tries = 0; + break; + case e1000_media_type_internal_serdes: + case e1000_media_type_fiber: + dev_info(pci_dev_to_dev(adapter->pdev), + "%s:MAS: changing media to copper\n", + adapter->netdev->name); + ctrl_ext &= + ~E1000_CTRL_EXT_LINK_MODE_PCIE_SERDES; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + break; + default: + /* shouldn't get here during regular operation */ + dev_err(pci_dev_to_dev(adapter->pdev), + "%s:AMS: Invalid media type found, returning\n", + adapter->netdev->name); + break; + } + E1000_WRITE_REG(hw, E1000_CTRL_EXT, ctrl_ext); + } +} + +#ifdef HAVE_I2C_SUPPORT +/* igb_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + */ +static int igb_get_i2c_data(void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + return (i2cctl & E1000_I2C_DATA_IN) != 0; +} + +/* igb_set_i2c_data - Sets the I2C data bit + * @data: pointer to hardware structure + * @state: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + */ +static void igb_set_i2c_data(void *data, int state) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + if (state) + i2cctl |= E1000_I2C_DATA_OUT; + else + i2cctl &= ~E1000_I2C_DATA_OUT; + + i2cctl &= ~E1000_I2C_DATA_OE_N; + i2cctl |= E1000_I2C_CLK_OE_N; + + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); + E1000_WRITE_FLUSH(hw); + +} + +/* igb_set_i2c_clk - Sets the I2C SCL clock + * @data: pointer to hardware structure + * @state: state to set clock + * + * Sets the I2C clock line to state + */ +static void igb_set_i2c_clk(void *data, int state) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + if (state) { + i2cctl |= E1000_I2C_CLK_OUT; + i2cctl &= ~E1000_I2C_CLK_OE_N; + } else { + i2cctl &= ~E1000_I2C_CLK_OUT; + i2cctl &= ~E1000_I2C_CLK_OE_N; + } + E1000_WRITE_REG(hw, E1000_I2CPARAMS, i2cctl); + E1000_WRITE_FLUSH(hw); +} + +/* igb_get_i2c_clk - Gets the I2C SCL clock state + * @data: pointer to hardware structure + * + * Gets the I2C clock state + */ +static int igb_get_i2c_clk(void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + struct e1000_hw *hw = &adapter->hw; + s32 i2cctl = E1000_READ_REG(hw, E1000_I2CPARAMS); + + return (i2cctl & E1000_I2C_CLK_IN) != 0; +} + +static const struct i2c_algo_bit_data igb_i2c_algo = { + .setsda = igb_set_i2c_data, + .setscl = igb_set_i2c_clk, + .getsda = igb_get_i2c_data, + .getscl = igb_get_i2c_clk, + .udelay = 5, + .timeout = 20, +}; + +/* igb_init_i2c - Init I2C interface + * @adapter: pointer to adapter structure + * + */ +static s32 igb_init_i2c(struct igb_adapter *adapter) +{ + s32 status = E1000_SUCCESS; + + /* I2C interface supported on i350 devices */ + if (adapter->hw.mac.type != e1000_i350) + return E1000_SUCCESS; + + /* Initialize the i2c bus which is controlled by the registers. + * This bus will use the i2c_algo_bit structue that implements + * the protocol through toggling of the 4 bits in the register. + */ + adapter->i2c_adap.owner = THIS_MODULE; + adapter->i2c_algo = igb_i2c_algo; + adapter->i2c_algo.data = adapter; + adapter->i2c_adap.algo_data = &adapter->i2c_algo; + adapter->i2c_adap.dev.parent = &adapter->pdev->dev; + strlcpy(adapter->i2c_adap.name, "igb BB", + sizeof(adapter->i2c_adap.name)); + status = i2c_bit_add_bus(&adapter->i2c_adap); + return status; +} + +#endif /* HAVE_I2C_SUPPORT */ +/** + * igb_up - Open the interface and prepare it to handle traffic + * @adapter: board private structure + **/ +int igb_up(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + /* hardware has been reset, we need to reload some things */ + igb_configure(adapter); + + clear_bit(__IGB_DOWN, &adapter->state); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_enable(&(adapter->q_vector[i]->napi)); + + if (adapter->msix_entries) + igb_configure_msix(adapter); + else + igb_assign_vector(adapter->q_vector[0], 0); + + igb_configure_lli(adapter); + + /* Clear any pending interrupts. */ + E1000_READ_REG(hw, E1000_ICR); + igb_irq_enable(adapter); + + /* notify VFs that reset has been completed */ + if (adapter->vfs_allocated_count) { + u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data); + } + + netif_tx_start_all_queues(adapter->netdev); + + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + schedule_work(&adapter->dma_err_task); + /* start the watchdog. */ + hw->mac.get_link_status = 1; + schedule_work(&adapter->watchdog_task); + + if ((adapter->flags & IGB_FLAG_EEE) && + (!hw->dev_spec._82575.eee_disable)) + adapter->eee_advert = MDIO_EEE_100TX | MDIO_EEE_1000T; + + return 0; +} + +void igb_down(struct igb_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + struct e1000_hw *hw = &adapter->hw; + u32 tctl, rctl; + int i; + + /* signal that we're down so the interrupt handler does not + * reschedule our watchdog timer */ + set_bit(__IGB_DOWN, &adapter->state); + + /* disable receives in the hardware */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + E1000_WRITE_REG(hw, E1000_RCTL, rctl & ~E1000_RCTL_EN); + /* flush and sleep below */ + + netif_tx_stop_all_queues(netdev); + + /* disable transmits in the hardware */ + tctl = E1000_READ_REG(hw, E1000_TCTL); + tctl &= ~E1000_TCTL_EN; + E1000_WRITE_REG(hw, E1000_TCTL, tctl); + /* flush both disables and wait for them to finish */ + E1000_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_disable(&(adapter->q_vector[i]->napi)); + + igb_irq_disable(adapter); + + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + + del_timer_sync(&adapter->watchdog_timer); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + del_timer_sync(&adapter->dma_err_timer); + del_timer_sync(&adapter->phy_info_timer); + + netif_carrier_off(netdev); + + /* record the stats before reset*/ + igb_update_stats(adapter); + + adapter->link_speed = 0; + adapter->link_duplex = 0; + +#ifdef HAVE_PCI_ERS + if (!pci_channel_offline(adapter->pdev)) + igb_reset(adapter); +#else + igb_reset(adapter); +#endif + igb_clean_all_tx_rings(adapter); + igb_clean_all_rx_rings(adapter); +#ifdef IGB_DCA + /* since we reset the hardware DCA settings were cleared */ + igb_setup_dca(adapter); +#endif +} + +void igb_reinit_locked(struct igb_adapter *adapter) +{ + WARN_ON(in_interrupt()); + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + igb_down(adapter); + igb_up(adapter); + clear_bit(__IGB_RESETTING, &adapter->state); +} + +/** + * igb_enable_mas - Media Autosense re-enable after swap + * + * @adapter: adapter struct + **/ +static s32 igb_enable_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 connsw; + s32 ret_val = E1000_SUCCESS; + + connsw = E1000_READ_REG(hw, E1000_CONNSW); + if (hw->phy.media_type == e1000_media_type_copper) { + /* configure for SerDes media detect */ + if (!(connsw & E1000_CONNSW_SERDESD)) { + connsw |= E1000_CONNSW_ENRGSRC; + connsw |= E1000_CONNSW_AUTOSENSE_EN; + E1000_WRITE_REG(hw, E1000_CONNSW, connsw); + E1000_WRITE_FLUSH(hw); + } else if (connsw & E1000_CONNSW_SERDESD) { + /* already SerDes, no need to enable anything */ + return ret_val; + } else { + dev_info(pci_dev_to_dev(adapter->pdev), + "%s:MAS: Unable to configure feature, disabling..\n", + adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_MAS_ENABLE; + } + } + return ret_val; +} + +void igb_reset(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + struct e1000_mac_info *mac = &hw->mac; + struct e1000_fc_info *fc = &hw->fc; + u32 pba = 0, tx_space, min_tx_space, min_rx_space, hwm; + + /* Repartition Pba for greater than 9k mtu + * To take effect CTRL.RST is required. + */ + switch (mac->type) { + case e1000_i350: + case e1000_82580: + case e1000_i354: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba = e1000_rxpbs_adjust_82580(pba); + break; + case e1000_82576: + pba = E1000_READ_REG(hw, E1000_RXPBS); + pba &= E1000_RXPBS_SIZE_MASK_82576; + break; + case e1000_82575: + case e1000_i210: + case e1000_i211: + default: + pba = E1000_PBA_34K; + break; + } + + if ((adapter->max_frame_size > ETH_FRAME_LEN + ETH_FCS_LEN) && + (mac->type < e1000_82576)) { + /* adjust PBA for jumbo frames */ + E1000_WRITE_REG(hw, E1000_PBA, pba); + + /* To maintain wire speed transmits, the Tx FIFO should be + * large enough to accommodate two full transmit packets, + * rounded up to the next 1KB and expressed in KB. Likewise, + * the Rx FIFO should be large enough to accommodate at least + * one full receive packet and is similarly rounded up and + * expressed in KB. */ + pba = E1000_READ_REG(hw, E1000_PBA); + /* upper 16 bits has Tx packet buffer allocation size in KB */ + tx_space = pba >> 16; + /* lower 16 bits has Rx packet buffer allocation size in KB */ + pba &= 0xffff; + /* the tx fifo also stores 16 bytes of information about the tx + * but don't include ethernet FCS because hardware appends it */ + min_tx_space = (adapter->max_frame_size + + sizeof(union e1000_adv_tx_desc) - + ETH_FCS_LEN) * 2; + min_tx_space = ALIGN(min_tx_space, 1024); + min_tx_space >>= 10; + /* software strips receive CRC, so leave room for it */ + min_rx_space = adapter->max_frame_size; + min_rx_space = ALIGN(min_rx_space, 1024); + min_rx_space >>= 10; + + /* If current Tx allocation is less than the min Tx FIFO size, + * and the min Tx FIFO size is less than the current Rx FIFO + * allocation, take space away from current Rx allocation */ + if (tx_space < min_tx_space && + ((min_tx_space - tx_space) < pba)) { + pba = pba - (min_tx_space - tx_space); + + /* if short on rx space, rx wins and must trump tx + * adjustment */ + if (pba < min_rx_space) + pba = min_rx_space; + } + E1000_WRITE_REG(hw, E1000_PBA, pba); + } + + /* flow control settings */ + /* The high water mark must be low enough to fit one full frame + * (or the size used for early receive) above it in the Rx FIFO. + * Set it to the lower of: + * - 90% of the Rx FIFO size, or + * - the full Rx FIFO size minus one full frame */ + hwm = min(((pba << 10) * 9 / 10), + ((pba << 10) - 2 * adapter->max_frame_size)); + + fc->high_water = hwm & 0xFFFFFFF0; /* 16-byte granularity */ + fc->low_water = fc->high_water - 16; + fc->pause_time = 0xFFFF; + fc->send_xon = 1; + fc->current_mode = fc->requested_mode; + + /* disable receive for all VFs and wait one second */ + if (adapter->vfs_allocated_count) { + int i; + /* + * Clear all flags except indication that the PF has set + * the VF MAC addresses administratively + */ + for (i = 0 ; i < adapter->vfs_allocated_count; i++) + adapter->vf_data[i].flags &= IGB_VF_FLAG_PF_SET_MAC; + + /* ping all the active vfs to let them know we are going down */ + igb_ping_all_vfs(adapter); + + /* disable transmits and receives */ + E1000_WRITE_REG(hw, E1000_VFRE, 0); + E1000_WRITE_REG(hw, E1000_VFTE, 0); + } + + /* Allow time for pending master requests to run */ + e1000_reset_hw(hw); + E1000_WRITE_REG(hw, E1000_WUC, 0); + + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + e1000_setup_init_funcs(hw, TRUE); + igb_check_options(adapter); + e1000_get_bus_info(hw); + adapter->flags &= ~IGB_FLAG_MEDIA_RESET; + } + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (igb_enable_mas(adapter)) + dev_err(pci_dev_to_dev(pdev), + "Error enabling Media Auto Sense\n"); + } + if (e1000_init_hw(hw)) + dev_err(pci_dev_to_dev(pdev), "Hardware Error\n"); + + /* + * Flow control settings reset on hardware reset, so guarantee flow + * control is off when forcing speed. + */ + if (!hw->mac.autoneg) + e1000_force_mac_fc(hw); + + igb_init_dmac(adapter, pba); + /* Re-initialize the thermal sensor on i350 devices. */ + if (mac->type == e1000_i350 && hw->bus.func == 0) { + /* + * If present, re-initialize the external thermal sensor + * interface. + */ + if (adapter->ets) + e1000_set_i2c_bb(hw); + e1000_init_thermal_sensor_thresh(hw); + } + + /*Re-establish EEE setting */ + if (hw->phy.media_type == e1000_media_type_copper) { + switch (mac->type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + e1000_set_eee_i350(hw); + break; + case e1000_i354: + e1000_set_eee_i354(hw); + break; + default: + break; + } + } + + if (!netif_running(adapter->netdev)) + igb_power_down_link(adapter); + + igb_update_mng_vlan(adapter); + + /* Enable h/w to recognize an 802.1Q VLAN Ethernet packet */ + E1000_WRITE_REG(hw, E1000_VET, ETHERNET_IEEE_VLAN_TYPE); + + +#ifdef HAVE_PTP_1588_CLOCK + /* Re-enable PTP, where applicable. */ + igb_ptp_reset(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + + e1000_get_phy_info(hw); + + adapter->devrc++; +} + +#ifdef HAVE_NDO_SET_FEATURES +static kni_netdev_features_t igb_fix_features(struct net_device *netdev, + kni_netdev_features_t features) +{ + /* + * Since there is no support for separate tx vlan accel + * enabled make sure tx flag is cleared if rx is. + */ +#ifdef NETIF_F_HW_VLAN_CTAG_RX + if (!(features & NETIF_F_HW_VLAN_CTAG_RX)) + features &= ~NETIF_F_HW_VLAN_CTAG_TX; +#else + if (!(features & NETIF_F_HW_VLAN_RX)) + features &= ~NETIF_F_HW_VLAN_TX; +#endif + + /* If Rx checksum is disabled, then LRO should also be disabled */ + if (!(features & NETIF_F_RXCSUM)) + features &= ~NETIF_F_LRO; + + return features; +} + +static int igb_set_features(struct net_device *netdev, + kni_netdev_features_t features) +{ + u32 changed = netdev->features ^ features; + +#ifdef NETIF_F_HW_VLAN_CTAG_RX + if (changed & NETIF_F_HW_VLAN_CTAG_RX) +#else + if (changed & NETIF_F_HW_VLAN_RX) +#endif + igb_vlan_mode(netdev, features); + + return 0; +} + +#ifdef NTF_SELF +#ifdef USE_CONST_DEV_UC_CHAR +static int igb_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[], + struct net_device *dev, + const unsigned char *addr, +#ifdef HAVE_NDO_FDB_ADD_VID + u16 vid, +#endif + u16 flags) +#else +static int igb_ndo_fdb_add(struct ndmsg *ndm, + struct net_device *dev, + unsigned char *addr, + u16 flags) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + int err; + + if (!(adapter->vfs_allocated_count)) + return -EOPNOTSUPP; + + /* Hardware does not support aging addresses so if a + * ndm_state is given only allow permanent addresses + */ + if (ndm->ndm_state && !(ndm->ndm_state & NUD_PERMANENT)) { + pr_info("%s: FDB only supports static addresses\n", + igb_driver_name); + return -EINVAL; + } + + if (is_unicast_ether_addr(addr) || is_link_local_ether_addr(addr)) { + u32 rar_uc_entries = hw->mac.rar_entry_count - + (adapter->vfs_allocated_count + 1); + + if (netdev_uc_count(dev) < rar_uc_entries) + err = dev_uc_add_excl(dev, addr); + else + err = -ENOMEM; + } else if (is_multicast_ether_addr(addr)) { + err = dev_mc_add_excl(dev, addr); + } else { + err = -EINVAL; + } + + /* Only return duplicate errors if NLM_F_EXCL is set */ + if (err == -EEXIST && !(flags & NLM_F_EXCL)) + err = 0; + + return err; +} + +#ifndef USE_DEFAULT_FDB_DEL_DUMP +#ifdef USE_CONST_DEV_UC_CHAR +static int igb_ndo_fdb_del(struct ndmsg *ndm, + struct net_device *dev, + const unsigned char *addr) +#else +static int igb_ndo_fdb_del(struct ndmsg *ndm, + struct net_device *dev, + unsigned char *addr) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + int err = -EOPNOTSUPP; + + if (ndm->ndm_state & NUD_PERMANENT) { + pr_info("%s: FDB only supports static addresses\n", + igb_driver_name); + return -EINVAL; + } + + if (adapter->vfs_allocated_count) { + if (is_unicast_ether_addr(addr)) + err = dev_uc_del(dev, addr); + else if (is_multicast_ether_addr(addr)) + err = dev_mc_del(dev, addr); + else + err = -EINVAL; + } + + return err; +} + +static int igb_ndo_fdb_dump(struct sk_buff *skb, + struct netlink_callback *cb, + struct net_device *dev, + int idx) +{ + struct igb_adapter *adapter = netdev_priv(dev); + + if (adapter->vfs_allocated_count) + idx = ndo_dflt_fdb_dump(skb, cb, dev, idx); + + return idx; +} +#endif /* USE_DEFAULT_FDB_DEL_DUMP */ + +#ifdef HAVE_BRIDGE_ATTRIBS +#ifdef HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS +static int igb_ndo_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh, + u16 flags) +#else +static int igb_ndo_bridge_setlink(struct net_device *dev, + struct nlmsghdr *nlh) +#endif /* HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS */ +{ + struct igb_adapter *adapter = netdev_priv(dev); + struct e1000_hw *hw = &adapter->hw; + struct nlattr *attr, *br_spec; + int rem; + + if (!(adapter->vfs_allocated_count)) + return -EOPNOTSUPP; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_i350: + case e1000_i354: + break; + default: + return -EOPNOTSUPP; + } + + br_spec = nlmsg_find_attr(nlh, sizeof(struct ifinfomsg), IFLA_AF_SPEC); + + nla_for_each_nested(attr, br_spec, rem) { + __u16 mode; + + if (nla_type(attr) != IFLA_BRIDGE_MODE) + continue; + + mode = nla_get_u16(attr); + if (mode == BRIDGE_MODE_VEPA) { + e1000_vmdq_set_loopback_pf(hw, 0); + adapter->flags &= ~IGB_FLAG_LOOPBACK_ENABLE; + } else if (mode == BRIDGE_MODE_VEB) { + e1000_vmdq_set_loopback_pf(hw, 1); + adapter->flags |= IGB_FLAG_LOOPBACK_ENABLE; + } else + return -EINVAL; + + netdev_info(adapter->netdev, "enabling bridge mode: %s\n", + mode == BRIDGE_MODE_VEPA ? "VEPA" : "VEB"); + } + + return 0; +} + +#ifdef HAVE_BRIDGE_FILTER +#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS +static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask, + int nlflags) +#else +static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev, u32 filter_mask) +#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */ +#else +static int igb_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq, + struct net_device *dev) +#endif +{ + struct igb_adapter *adapter = netdev_priv(dev); + u16 mode; + + if (!(adapter->vfs_allocated_count)) + return -EOPNOTSUPP; + + if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE) + mode = BRIDGE_MODE_VEB; + else + mode = BRIDGE_MODE_VEPA; + +#ifdef HAVE_NDO_DFLT_BRIDGE_ADD_MASK +#ifdef HAVE_NDO_BRIDGE_GETLINK_NLFLAGS +#ifdef HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, + nlflags, filter_mask, NULL); +#else + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0, nlflags); +#endif /* HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL */ +#else + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0); +#endif /* HAVE_NDO_BRIDGE_GETLINK_NLFLAGS */ +#else + return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode); +#endif /* HAVE_NDO_DFLT_BRIDGE_ADD_MASK */ +} +#endif /* HAVE_BRIDGE_ATTRIBS */ +#endif /* NTF_SELF */ + +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef HAVE_NET_DEVICE_OPS +static const struct net_device_ops igb_netdev_ops = { + .ndo_open = igb_open, + .ndo_stop = igb_close, + .ndo_start_xmit = igb_xmit_frame, + .ndo_get_stats = igb_get_stats, + .ndo_set_rx_mode = igb_set_rx_mode, + .ndo_set_mac_address = igb_set_mac, + .ndo_change_mtu = igb_change_mtu, + .ndo_do_ioctl = igb_ioctl, + .ndo_tx_timeout = igb_tx_timeout, + .ndo_validate_addr = eth_validate_addr, + .ndo_vlan_rx_add_vid = igb_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = igb_vlan_rx_kill_vid, +#ifdef IFLA_VF_MAX + .ndo_set_vf_mac = igb_ndo_set_vf_mac, + .ndo_set_vf_vlan = igb_ndo_set_vf_vlan, +#ifdef HAVE_VF_MIN_MAX_TXRATE + .ndo_set_vf_rate = igb_ndo_set_vf_bw, +#else /* HAVE_VF_MIN_MAX_TXRATE */ + .ndo_set_vf_tx_rate = igb_ndo_set_vf_bw, +#endif /* HAVE_VF_MIN_MAX_TXRATE */ + .ndo_get_vf_config = igb_ndo_get_vf_config, +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + .ndo_set_vf_spoofchk = igb_ndo_set_vf_spoofchk, +#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */ +#endif /* IFLA_VF_MAX */ +#ifdef CONFIG_NET_POLL_CONTROLLER + .ndo_poll_controller = igb_netpoll, +#endif +#ifdef HAVE_NDO_SET_FEATURES + .ndo_fix_features = igb_fix_features, + .ndo_set_features = igb_set_features, +#endif +#ifdef HAVE_VLAN_RX_REGISTER + .ndo_vlan_rx_register = igb_vlan_mode, +#endif +#ifndef HAVE_RHEL6_NETDEV_OPS_EXT_FDB +#ifdef NTF_SELF + .ndo_fdb_add = igb_ndo_fdb_add, +#ifndef USE_DEFAULT_FDB_DEL_DUMP + .ndo_fdb_del = igb_ndo_fdb_del, + .ndo_fdb_dump = igb_ndo_fdb_dump, +#endif +#endif /* ! HAVE_RHEL6_NETDEV_OPS_EXT_FDB */ +#ifdef HAVE_BRIDGE_ATTRIBS + .ndo_bridge_setlink = igb_ndo_bridge_setlink, + .ndo_bridge_getlink = igb_ndo_bridge_getlink, +#endif /* HAVE_BRIDGE_ATTRIBS */ +#endif +}; + +#ifdef CONFIG_IGB_VMDQ_NETDEV +static const struct net_device_ops igb_vmdq_ops = { + .ndo_open = &igb_vmdq_open, + .ndo_stop = &igb_vmdq_close, + .ndo_start_xmit = &igb_vmdq_xmit_frame, + .ndo_get_stats = &igb_vmdq_get_stats, + .ndo_set_rx_mode = &igb_vmdq_set_rx_mode, + .ndo_validate_addr = eth_validate_addr, + .ndo_set_mac_address = &igb_vmdq_set_mac, + .ndo_change_mtu = &igb_vmdq_change_mtu, + .ndo_tx_timeout = &igb_vmdq_tx_timeout, + .ndo_vlan_rx_register = &igb_vmdq_vlan_rx_register, + .ndo_vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid, + .ndo_vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid, +}; + +#endif /* CONFIG_IGB_VMDQ_NETDEV */ +#endif /* HAVE_NET_DEVICE_OPS */ +#ifdef CONFIG_IGB_VMDQ_NETDEV +void igb_assign_vmdq_netdev_ops(struct net_device *vnetdev) +{ +#ifdef HAVE_NET_DEVICE_OPS + vnetdev->netdev_ops = &igb_vmdq_ops; +#else + dev->open = &igb_vmdq_open; + dev->stop = &igb_vmdq_close; + dev->hard_start_xmit = &igb_vmdq_xmit_frame; + dev->get_stats = &igb_vmdq_get_stats; +#ifdef HAVE_SET_RX_MODE + dev->set_rx_mode = &igb_vmdq_set_rx_mode; +#endif + dev->set_multicast_list = &igb_vmdq_set_rx_mode; + dev->set_mac_address = &igb_vmdq_set_mac; + dev->change_mtu = &igb_vmdq_change_mtu; +#ifdef HAVE_TX_TIMEOUT + dev->tx_timeout = &igb_vmdq_tx_timeout; +#endif +#if defined(NETIF_F_HW_VLAN_TX) || defined(NETIF_F_HW_VLAN_CTAG_TX) + dev->vlan_rx_register = &igb_vmdq_vlan_rx_register; + dev->vlan_rx_add_vid = &igb_vmdq_vlan_rx_add_vid; + dev->vlan_rx_kill_vid = &igb_vmdq_vlan_rx_kill_vid; +#endif +#endif + igb_vmdq_set_ethtool_ops(vnetdev); + vnetdev->watchdog_timeo = 5 * HZ; + +} + +int igb_init_vmdq_netdevs(struct igb_adapter *adapter) +{ + int pool, err = 0, base_queue; + struct net_device *vnetdev; + struct igb_vmdq_adapter *vmdq_adapter; + + for (pool = 1; pool < adapter->vmdq_pools; pool++) { + int qpp = (!adapter->rss_queues ? 1 : adapter->rss_queues); + base_queue = pool * qpp; + vnetdev = alloc_etherdev(sizeof(struct igb_vmdq_adapter)); + if (!vnetdev) { + err = -ENOMEM; + break; + } + vmdq_adapter = netdev_priv(vnetdev); + vmdq_adapter->vnetdev = vnetdev; + vmdq_adapter->real_adapter = adapter; + vmdq_adapter->rx_ring = adapter->rx_ring[base_queue]; + vmdq_adapter->tx_ring = adapter->tx_ring[base_queue]; + igb_assign_vmdq_netdev_ops(vnetdev); + snprintf(vnetdev->name, IFNAMSIZ, "%sv%d", + adapter->netdev->name, pool); + vnetdev->features = adapter->netdev->features; +#ifdef HAVE_NETDEV_VLAN_FEATURES + vnetdev->vlan_features = adapter->netdev->vlan_features; +#endif + adapter->vmdq_netdev[pool-1] = vnetdev; + err = register_netdev(vnetdev); + if (err) + break; + } + return err; +} + +int igb_remove_vmdq_netdevs(struct igb_adapter *adapter) +{ + int pool, err = 0; + + for (pool = 1; pool < adapter->vmdq_pools; pool++) { + unregister_netdev(adapter->vmdq_netdev[pool-1]); + free_netdev(adapter->vmdq_netdev[pool-1]); + adapter->vmdq_netdev[pool-1] = NULL; + } + return err; +} +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + +/** + * igb_set_fw_version - Configure version string for ethtool + * @adapter: adapter struct + * + **/ +static void igb_set_fw_version(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct e1000_fw_version fw; + + e1000_get_fw_version(hw, &fw); + + switch (hw->mac.type) { + case e1000_i210: + case e1000_i211: + if (!(e1000_get_flash_presence_i210(hw))) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%2d.%2d-%d", + fw.invm_major, fw.invm_minor, fw.invm_img_type); + break; + } + /* fall through */ + default: + /* if option rom is valid, display its version too*/ + if (fw.or_valid) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x, %d.%d.%d", + fw.eep_major, fw.eep_minor, fw.etrack_id, + fw.or_major, fw.or_build, fw.or_patch); + /* no option rom */ + } else { + if (fw.etrack_id != 0X0000) { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d, 0x%08x", + fw.eep_major, fw.eep_minor, fw.etrack_id); + } else { + snprintf(adapter->fw_version, + sizeof(adapter->fw_version), + "%d.%d.%d", + fw.eep_major, fw.eep_minor, fw.eep_build); + } + } + break; + } + + return; +} + +/** + * igb_init_mas - init Media Autosense feature if enabled in the NVM + * + * @adapter: adapter struct + **/ +static void igb_init_mas(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u16 eeprom_data; + + e1000_read_nvm(hw, NVM_COMPAT, 1, &eeprom_data); + switch (hw->bus.func) { + case E1000_FUNC_0: + if (eeprom_data & IGB_MAS_ENABLE_0) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + case E1000_FUNC_1: + if (eeprom_data & IGB_MAS_ENABLE_1) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + case E1000_FUNC_2: + if (eeprom_data & IGB_MAS_ENABLE_2) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + case E1000_FUNC_3: + if (eeprom_data & IGB_MAS_ENABLE_3) + adapter->flags |= IGB_FLAG_MAS_ENABLE; + break; + default: + /* Shouldn't get here */ + dev_err(pci_dev_to_dev(adapter->pdev), + "%s:AMS: Invalid port configuration, returning\n", + adapter->netdev->name); + break; + } +} + +/** + * igb_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igb_pci_tbl + * + * Returns 0 on success, negative on failure + * + * igb_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +static int __devinit igb_probe(struct pci_dev *pdev, + const struct pci_device_id *ent) +{ + struct net_device *netdev; + struct igb_adapter *adapter; + struct e1000_hw *hw; + u16 eeprom_data = 0; + u8 pba_str[E1000_PBANUM_LENGTH]; + s32 ret_val; + static int global_quad_port_a; /* global quad port a indication */ + int i, err, pci_using_dac; + static int cards_found; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + pci_using_dac = 0; + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) + pci_using_dac = 1; + } else { + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + IGB_ERR("No usable DMA configuration, " + "aborting\n"); + goto err_dma; + } + } + } + +#ifndef HAVE_ASPM_QUIRKS + /* 82575 requires that the pci-e link partner disable the L0s state */ + switch (pdev->device) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + default: + break; + } + +#endif /* HAVE_ASPM_QUIRKS */ + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, + IORESOURCE_MEM), + igb_driver_name); + if (err) + goto err_pci_reg; + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); + + err = -ENOMEM; +#ifdef HAVE_TX_MQ + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), + IGB_MAX_TX_QUEUES); +#else + netdev = alloc_etherdev(sizeof(struct igb_adapter)); +#endif /* HAVE_TX_MQ */ + if (!netdev) + goto err_alloc_etherdev; + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->port_num = hw->bus.func; + adapter->msg_enable = (1 << debug) - 1; + +#ifdef HAVE_PCI_ERS + err = pci_save_state(pdev); + if (err) + goto err_ioremap; +#endif + err = -EIO; + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) + goto err_ioremap; + +#ifdef HAVE_NET_DEVICE_OPS + netdev->netdev_ops = &igb_netdev_ops; +#else /* HAVE_NET_DEVICE_OPS */ + netdev->open = &igb_open; + netdev->stop = &igb_close; + netdev->get_stats = &igb_get_stats; +#ifdef HAVE_SET_RX_MODE + netdev->set_rx_mode = &igb_set_rx_mode; +#endif + netdev->set_multicast_list = &igb_set_rx_mode; + netdev->set_mac_address = &igb_set_mac; + netdev->change_mtu = &igb_change_mtu; + netdev->do_ioctl = &igb_ioctl; +#ifdef HAVE_TX_TIMEOUT + netdev->tx_timeout = &igb_tx_timeout; +#endif + netdev->vlan_rx_register = igb_vlan_mode; + netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; + netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + netdev->poll_controller = igb_netpoll; +#endif + netdev->hard_start_xmit = &igb_xmit_frame; +#endif /* HAVE_NET_DEVICE_OPS */ + igb_set_ethtool_ops(netdev); +#ifdef HAVE_TX_TIMEOUT + netdev->watchdog_timeo = 5 * HZ; +#endif + + strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); + + adapter->bd_number = cards_found; + + /* setup the private structure */ + err = igb_sw_init(adapter); + if (err) + goto err_sw_init; + + e1000_get_bus_info(hw); + + hw->phy.autoneg_wait_to_complete = FALSE; + hw->mac.adaptive_ifs = FALSE; + + /* Copper options */ + if (hw->phy.media_type == e1000_media_type_copper) { + hw->phy.mdix = AUTO_ALL_MODES; + hw->phy.disable_polarity_correction = FALSE; + hw->phy.ms_type = e1000_ms_hw_default; + } + + if (e1000_check_reset_block(hw)) + dev_info(pci_dev_to_dev(pdev), + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* + * features is initialized to 0 in allocation, it might have bits + * set by igb_sw_init so we should use an or instead of an + * assignment. + */ + netdev->features |= NETIF_F_SG | + NETIF_F_IP_CSUM | +#ifdef NETIF_F_IPV6_CSUM + NETIF_F_IPV6_CSUM | +#endif +#ifdef NETIF_F_TSO + NETIF_F_TSO | +#ifdef NETIF_F_TSO6 + NETIF_F_TSO6 | +#endif +#endif /* NETIF_F_TSO */ +#ifdef NETIF_F_RXHASH + NETIF_F_RXHASH | +#endif + NETIF_F_RXCSUM | +#ifdef NETIF_F_HW_VLAN_CTAG_RX + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; +#else + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_TX; +#endif + + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CSUM; + +#ifdef HAVE_NDO_SET_FEATURES + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= netdev->features; +#ifndef IGB_NO_LRO + + /* give us the option of enabling LRO later */ + netdev->hw_features |= NETIF_F_LRO; +#endif +#else +#ifdef NETIF_F_GRO + + /* this is only needed on kernels prior to 2.6.39 */ + netdev->features |= NETIF_F_GRO; +#endif +#endif + + /* set this bit last since it cannot be part of hw_features */ +#ifdef NETIF_F_HW_VLAN_CTAG_FILTER + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; +#else + netdev->features |= NETIF_F_HW_VLAN_FILTER; +#endif + +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_SG; + +#endif + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); +#ifdef DEBUG + if (adapter->dmac != IGB_DMAC_DISABLE) + printk("%s: DMA Coalescing is enabled..\n", netdev->name); +#endif + + /* before reading the NVM, reset the controller to put the device in a + * known good starting state */ + e1000_reset_hw(hw); + + /* make sure the NVM is good */ + if (e1000_validate_nvm_checksum(hw) < 0) { + dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not" + " Valid\n"); + err = -EIO; + goto err_eeprom; + } + + /* copy the MAC address out of the NVM */ + if (e1000_read_mac_addr(hw)) + dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n"); + memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); +#ifdef ETHTOOL_GPERMADDR + memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); + + if (!is_valid_ether_addr(netdev->perm_addr)) { +#else + if (!is_valid_ether_addr(netdev->dev_addr)) { +#endif + dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n"); + err = -EIO; + goto err_eeprom; + } + + memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len); + adapter->mac_table[0].queue = adapter->vfs_allocated_count; + adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE); + igb_rar_set(adapter, 0); + + /* get firmware version for ethtool -i */ + igb_set_fw_version(adapter); + + /* Check if Media Autosense is enabled */ + if (hw->mac.type == e1000_82580) + igb_init_mas(adapter); + setup_timer(&adapter->watchdog_timer, &igb_watchdog, + (unsigned long) adapter); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer, + (unsigned long) adapter); + setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, + (unsigned long) adapter); + + INIT_WORK(&adapter->reset_task, igb_reset_task); + INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + INIT_WORK(&adapter->dma_err_task, igb_dma_err_task); + + /* Initialize link properties that are user-changeable */ + adapter->fc_autoneg = true; + hw->mac.autoneg = true; + hw->phy.autoneg_advertised = 0x2f; + + hw->fc.requested_mode = e1000_fc_default; + hw->fc.current_mode = e1000_fc_default; + + e1000_validate_mdi_setting(hw); + + /* By default, support wake on port A */ + if (hw->bus.func == 0) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* Check the NVM for wake support for non-port A ports */ + if (hw->mac.type >= e1000_82580) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &eeprom_data); + else if (hw->bus.func == 1) + e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); + + if (eeprom_data & IGB_EEPROM_APME) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* now that we have the eeprom settings, apply the special cases where + * the eeprom may be wrong or the board simply won't support wake on + * lan on a particular port */ + switch (pdev->device) { + case E1000_DEV_ID_82575GB_QUAD_COPPER: + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + else + adapter->flags |= IGB_FLAG_QUAD_PORT_A; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; + default: + /* If the device can't wake, don't set software support */ + if (!device_can_wakeup(&adapter->pdev->dev)) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + } + + /* initialize the wol settings based on the eeprom settings */ + if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) + adapter->wol |= E1000_WUFC_MAG; + + /* Some vendors want WoL disabled by default, but still supported */ + if ((hw->mac.type == e1000_i350) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + + device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), + adapter->flags & IGB_FLAG_WOL_SUPPORTED); + + /* reset the hardware with the new settings */ + igb_reset(adapter); + adapter->devrc = 0; + +#ifdef HAVE_I2C_SUPPORT + /* Init the I2C interface */ + err = igb_init_i2c(adapter); + if (err) { + dev_err(&pdev->dev, "failed to init i2c interface\n"); + goto err_eeprom; + } +#endif /* HAVE_I2C_SUPPORT */ + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + + strncpy(netdev->name, "eth%d", IFNAMSIZ); + err = register_netdev(netdev); + if (err) + goto err_register; + +#ifdef CONFIG_IGB_VMDQ_NETDEV + err = igb_init_vmdq_netdevs(adapter); + if (err) + goto err_register; +#endif + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + +#ifdef IGB_DCA + if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) { + adapter->flags |= IGB_FLAG_DCA_ENABLED; + dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); + igb_setup_dca(adapter); + } + +#endif +#ifdef HAVE_PTP_1588_CLOCK + /* do hw tstamp init after resetting */ + igb_ptp_init(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + + dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n"); + /* print bus type/speed/width info */ + dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ", + netdev->name, + ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" : + (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown")); + dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name); + for (i = 0; i < 6; i++) + printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + + ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH); + if (ret_val) + strncpy(pba_str, "Unknown", sizeof(pba_str) - 1); + dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name, + pba_str); + + + /* Initialize the thermal sensor on i350 devices. */ + if (hw->mac.type == e1000_i350) { + if (hw->bus.func == 0) { + u16 ets_word; + + /* + * Read the NVM to determine if this i350 device + * supports an external thermal sensor. + */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word); + if (ets_word != 0x0000 && ets_word != 0xFFFF) + adapter->ets = true; + else + adapter->ets = false; + } +#ifdef IGB_HWMON + + igb_sysfs_init(adapter); +#else +#ifdef IGB_PROCFS + + igb_procfs_init(adapter); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + } else { + adapter->ets = false; + } + + if (hw->phy.media_type == e1000_media_type_copper) { + switch (hw->mac.type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + /* Enable EEE for internal copper PHY devices */ + err = e1000_set_eee_i350(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + break; + case e1000_i354: + if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) & + (E1000_CTRL_EXT_LINK_MODE_SGMII)) { + err = e1000_set_eee_i354(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + } + break; + default: + break; + } + } + + /* send driver version info to firmware */ + if (hw->mac.type >= e1000_i350) + igb_init_fw(adapter); + +#ifndef IGB_NO_LRO + if (netdev->features & NETIF_F_LRO) + dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n"); + else + dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n"); +#endif + dev_info(pci_dev_to_dev(pdev), + "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", + adapter->msix_entries ? "MSI-X" : + (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", + adapter->num_rx_queues, adapter->num_tx_queues); + + cards_found++; + + pm_runtime_put_noidle(&pdev->dev); + return 0; + +err_register: + igb_release_hw_control(adapter); +#ifdef HAVE_I2C_SUPPORT + memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); +#endif /* HAVE_I2C_SUPPORT */ +err_eeprom: + if (!e1000_check_reset_block(hw)) + e1000_phy_hw_reset(hw); + + if (hw->flash_address) + iounmap(hw->flash_address); +err_sw_init: + igb_clear_interrupt_scheme(adapter); + igb_reset_sriov_capability(adapter); + iounmap(hw->hw_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); +err_pci_reg: +err_dma: + pci_disable_device(pdev); + return err; +} +#ifdef HAVE_I2C_SUPPORT +/* + * igb_remove_i2c - Cleanup I2C interface + * @adapter: pointer to adapter structure + * + */ +static void igb_remove_i2c(struct igb_adapter *adapter) +{ + + /* free the adapter bus structure */ + i2c_del_adapter(&adapter->i2c_adap); +} +#endif /* HAVE_I2C_SUPPORT */ + +/** + * igb_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * igb_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +static void __devexit igb_remove(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + pm_runtime_get_noresume(&pdev->dev); +#ifdef HAVE_I2C_SUPPORT + igb_remove_i2c(adapter); +#endif /* HAVE_I2C_SUPPORT */ +#ifdef HAVE_PTP_1588_CLOCK + igb_ptp_stop(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + + /* flush_scheduled work may reschedule our watchdog task, so + * explicitly disable watchdog tasks from being rescheduled */ + set_bit(__IGB_DOWN, &adapter->state); + del_timer_sync(&adapter->watchdog_timer); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + del_timer_sync(&adapter->dma_err_timer); + del_timer_sync(&adapter->phy_info_timer); + + flush_scheduled_work(); + +#ifdef IGB_DCA + if (adapter->flags & IGB_FLAG_DCA_ENABLED) { + dev_info(pci_dev_to_dev(pdev), "DCA disabled\n"); + dca_remove_requester(&pdev->dev); + adapter->flags &= ~IGB_FLAG_DCA_ENABLED; + E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE); + } +#endif + + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. */ + igb_release_hw_control(adapter); + + unregister_netdev(netdev); +#ifdef CONFIG_IGB_VMDQ_NETDEV + igb_remove_vmdq_netdevs(adapter); +#endif + + igb_clear_interrupt_scheme(adapter); + igb_reset_sriov_capability(adapter); + + iounmap(hw->hw_addr); + if (hw->flash_address) + iounmap(hw->flash_address); + pci_release_selected_regions(pdev, + pci_select_bars(pdev, IORESOURCE_MEM)); + +#ifdef IGB_HWMON + igb_sysfs_exit(adapter); +#else +#ifdef IGB_PROCFS + igb_procfs_exit(adapter); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ + kfree(adapter->mac_table); + kfree(adapter->shadow_vfta); + free_netdev(netdev); + + pci_disable_pcie_error_reporting(pdev); + + pci_disable_device(pdev); +} + +/** + * igb_sw_init - Initialize general software structures (struct igb_adapter) + * @adapter: board private structure to initialize + * + * igb_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + **/ +static int igb_sw_init(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + struct pci_dev *pdev = adapter->pdev; + + /* PCI config space info */ + + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + + pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word); + + /* set default ring sizes */ + adapter->tx_ring_count = IGB_DEFAULT_TXD; + adapter->rx_ring_count = IGB_DEFAULT_RXD; + + /* set default work limits */ + adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; + + adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + + VLAN_HLEN; + + /* Initialize the hardware-specific values */ + if (e1000_setup_init_funcs(hw, TRUE)) { + dev_err(pci_dev_to_dev(pdev), "Hardware Initialization Failure\n"); + return -EIO; + } + + adapter->mac_table = kzalloc(sizeof(struct igb_mac_addr) * + hw->mac.rar_entry_count, + GFP_ATOMIC); + + /* Setup and initialize a copy of the hw vlan table array */ + adapter->shadow_vfta = kzalloc(sizeof(u32) * E1000_VFTA_ENTRIES, + GFP_ATOMIC); +#ifdef NO_KNI + /* These calls may decrease the number of queues */ + if (hw->mac.type < e1000_i210) { + igb_set_sriov_capability(adapter); + } + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + /* Explicitly disable IRQ since the NIC can be in any state. */ + igb_irq_disable(adapter); + + set_bit(__IGB_DOWN, &adapter->state); +#endif + return 0; +} + +/** + * igb_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + **/ +static int __igb_open(struct net_device *netdev, bool resuming) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; +#ifdef CONFIG_PM_RUNTIME + struct pci_dev *pdev = adapter->pdev; +#endif /* CONFIG_PM_RUNTIME */ + int err; + int i; + + /* disallow open during test */ + if (test_bit(__IGB_TESTING, &adapter->state)) { + WARN_ON(resuming); + return -EBUSY; + } + +#ifdef CONFIG_PM_RUNTIME + if (!resuming) + pm_runtime_get_sync(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + netif_carrier_off(netdev); + + /* allocate transmit descriptors */ + err = igb_setup_all_tx_resources(adapter); + if (err) + goto err_setup_tx; + + /* allocate receive descriptors */ + err = igb_setup_all_rx_resources(adapter); + if (err) + goto err_setup_rx; + + igb_power_up_link(adapter); + + /* before we allocate an interrupt, we must be ready to handle it. + * Setting DEBUG_SHIRQ in the kernel makes it fire an interrupt + * as soon as we call pci_request_irq, so we have to setup our + * clean_rx handler before we do so. */ + igb_configure(adapter); + + err = igb_request_irq(adapter); + if (err) + goto err_req_irq; + + /* Notify the stack of the actual queue counts. */ + netif_set_real_num_tx_queues(netdev, + adapter->vmdq_pools ? 1 : + adapter->num_tx_queues); + + err = netif_set_real_num_rx_queues(netdev, + adapter->vmdq_pools ? 1 : + adapter->num_rx_queues); + if (err) + goto err_set_queues; + + /* From here on the code is the same as igb_up() */ + clear_bit(__IGB_DOWN, &adapter->state); + + for (i = 0; i < adapter->num_q_vectors; i++) + napi_enable(&(adapter->q_vector[i]->napi)); + igb_configure_lli(adapter); + + /* Clear any pending interrupts. */ + E1000_READ_REG(hw, E1000_ICR); + + igb_irq_enable(adapter); + + /* notify VFs that reset has been completed */ + if (adapter->vfs_allocated_count) { + u32 reg_data = E1000_READ_REG(hw, E1000_CTRL_EXT); + reg_data |= E1000_CTRL_EXT_PFRSTD; + E1000_WRITE_REG(hw, E1000_CTRL_EXT, reg_data); + } + + netif_tx_start_all_queues(netdev); + + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + schedule_work(&adapter->dma_err_task); + + /* start the watchdog. */ + hw->mac.get_link_status = 1; + schedule_work(&adapter->watchdog_task); + + return E1000_SUCCESS; + +err_set_queues: + igb_free_irq(adapter); +err_req_irq: + igb_release_hw_control(adapter); + igb_power_down_link(adapter); + igb_free_all_rx_resources(adapter); +err_setup_rx: + igb_free_all_tx_resources(adapter); +err_setup_tx: + igb_reset(adapter); + +#ifdef CONFIG_PM_RUNTIME + if (!resuming) + pm_runtime_put(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + return err; +} + +static int igb_open(struct net_device *netdev) +{ + return __igb_open(netdev, false); +} + +/** + * igb_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the driver's control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + **/ +static int __igb_close(struct net_device *netdev, bool suspending) +{ + struct igb_adapter *adapter = netdev_priv(netdev); +#ifdef CONFIG_PM_RUNTIME + struct pci_dev *pdev = adapter->pdev; +#endif /* CONFIG_PM_RUNTIME */ + + WARN_ON(test_bit(__IGB_RESETTING, &adapter->state)); + +#ifdef CONFIG_PM_RUNTIME + if (!suspending) + pm_runtime_get_sync(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + igb_down(adapter); + + igb_release_hw_control(adapter); + + igb_free_irq(adapter); + + igb_free_all_tx_resources(adapter); + igb_free_all_rx_resources(adapter); + +#ifdef CONFIG_PM_RUNTIME + if (!suspending) + pm_runtime_put_sync(&pdev->dev); +#endif /* CONFIG_PM_RUNTIME */ + + return 0; +} + +static int igb_close(struct net_device *netdev) +{ + return __igb_close(netdev, false); +} + +/** + * igb_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup + * + * Return 0 on success, negative on failure + **/ +int igb_setup_tx_resources(struct igb_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + int size; + + size = sizeof(struct igb_tx_buffer) * tx_ring->count; + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + goto err; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(union e1000_adv_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + + tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + &tx_ring->dma, GFP_KERNEL); + + if (!tx_ring->desc) + goto err; + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; + + return 0; + +err: + vfree(tx_ring->tx_buffer_info); + dev_err(dev, + "Unable to allocate memory for the transmit descriptor ring\n"); + return -ENOMEM; +} + +/** + * igb_setup_all_tx_resources - wrapper to allocate Tx resources + * (Descriptors) for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ +static int igb_setup_all_tx_resources(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int i, err = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + err = igb_setup_tx_resources(adapter->tx_ring[i]); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "Allocation for Tx Queue %u failed\n", i); + for (i--; i >= 0; i--) + igb_free_tx_resources(adapter->tx_ring[i]); + break; + } + } + + return err; +} + +/** + * igb_setup_tctl - configure the transmit control registers + * @adapter: Board private structure + **/ +void igb_setup_tctl(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 tctl; + + /* disable queue 0 which is enabled by default on 82575 and 82576 */ + E1000_WRITE_REG(hw, E1000_TXDCTL(0), 0); + + /* Program the Transmit Control Register */ + tctl = E1000_READ_REG(hw, E1000_TCTL); + tctl &= ~E1000_TCTL_CT; + tctl |= E1000_TCTL_PSP | E1000_TCTL_RTLC | + (E1000_COLLISION_THRESHOLD << E1000_CT_SHIFT); + + e1000_config_collision_dist(hw); + + /* Enable transmits */ + tctl |= E1000_TCTL_EN; + + E1000_WRITE_REG(hw, E1000_TCTL, tctl); +} + +static u32 igb_tx_wthresh(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + switch (hw->mac.type) { + case e1000_i354: + return 4; + case e1000_82576: + if (adapter->msix_entries) + return 1; + default: + break; + } + + return 16; +} + +/** + * igb_configure_tx_ring - Configure transmit ring after Reset + * @adapter: board private structure + * @ring: tx ring to configure + * + * Configure a transmit ring after a reset. + **/ +void igb_configure_tx_ring(struct igb_adapter *adapter, + struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + u32 txdctl = 0; + u64 tdba = ring->dma; + int reg_idx = ring->reg_idx; + + /* disable the queue */ + E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), 0); + E1000_WRITE_FLUSH(hw); + mdelay(10); + + E1000_WRITE_REG(hw, E1000_TDLEN(reg_idx), + ring->count * sizeof(union e1000_adv_tx_desc)); + E1000_WRITE_REG(hw, E1000_TDBAL(reg_idx), + tdba & 0x00000000ffffffffULL); + E1000_WRITE_REG(hw, E1000_TDBAH(reg_idx), tdba >> 32); + + ring->tail = hw->hw_addr + E1000_TDT(reg_idx); + E1000_WRITE_REG(hw, E1000_TDH(reg_idx), 0); + writel(0, ring->tail); + + txdctl |= IGB_TX_PTHRESH; + txdctl |= IGB_TX_HTHRESH << 8; + txdctl |= igb_tx_wthresh(adapter) << 16; + + txdctl |= E1000_TXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_TXDCTL(reg_idx), txdctl); +} + +/** + * igb_configure_tx - Configure transmit Unit after Reset + * @adapter: board private structure + * + * Configure the Tx unit of the MAC after a reset. + **/ +static void igb_configure_tx(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igb_configure_tx_ring(adapter, adapter->tx_ring[i]); +} + +/** + * igb_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int igb_setup_rx_resources(struct igb_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + int size, desc_len; + + size = sizeof(struct igb_rx_buffer) * rx_ring->count; + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + goto err; + + desc_len = sizeof(union e1000_adv_rx_desc); + + /* Round up to nearest 4K */ + rx_ring->size = rx_ring->count * desc_len; + rx_ring->size = ALIGN(rx_ring->size, 4096); + + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + + if (!rx_ring->desc) + goto err; + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; + + return 0; + +err: + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the receive descriptor" + " ring\n"); + return -ENOMEM; +} + +/** + * igb_setup_all_rx_resources - wrapper to allocate Rx resources + * (Descriptors) for all queues + * @adapter: board private structure + * + * Return 0 on success, negative on failure + **/ +static int igb_setup_all_rx_resources(struct igb_adapter *adapter) +{ + struct pci_dev *pdev = adapter->pdev; + int i, err = 0; + + for (i = 0; i < adapter->num_rx_queues; i++) { + err = igb_setup_rx_resources(adapter->rx_ring[i]); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "Allocation for Rx Queue %u failed\n", i); + for (i--; i >= 0; i--) + igb_free_rx_resources(adapter->rx_ring[i]); + break; + } + } + + return err; +} + +/** + * igb_setup_mrqc - configure the multiple receive queue control registers + * @adapter: Board private structure + **/ +static void igb_setup_mrqc(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 mrqc, rxcsum; + u32 j, num_rx_queues, shift = 0, shift2 = 0; + static const u32 rsskey[10] = { 0xDA565A6D, 0xC20E5B25, 0x3D256741, + 0xB08FA343, 0xCB2BCAD0, 0xB4307BAE, + 0xA32DCB77, 0x0CF23080, 0x3BB7426A, + 0xFA01ACBE }; + + /* Fill out hash function seeds */ + for (j = 0; j < 10; j++) + E1000_WRITE_REG(hw, E1000_RSSRK(j), rsskey[j]); + + num_rx_queues = adapter->rss_queues; + + /* 82575 and 82576 supports 2 RSS queues for VMDq */ + switch (hw->mac.type) { + case e1000_82575: + if (adapter->vmdq_pools) { + shift = 2; + shift2 = 6; + break; + } + shift = 6; + break; + case e1000_82576: + /* 82576 supports 2 RSS queues for SR-IOV */ + if (adapter->vfs_allocated_count || adapter->vmdq_pools) { + shift = 3; + num_rx_queues = 2; + } + break; + default: + break; + } + + /* + * Populate the redirection table 4 entries at a time. To do this + * we are generating the results for n and n+2 and then interleaving + * those with the results with n+1 and n+3. + */ + for (j = 0; j < 32; j++) { + /* first pass generates n and n+2 */ + u32 base = ((j * 0x00040004) + 0x00020000) * num_rx_queues; + u32 reta = (base & 0x07800780) >> (7 - shift); + + /* second pass generates n+1 and n+3 */ + base += 0x00010001 * num_rx_queues; + reta |= (base & 0x07800780) << (1 + shift); + + /* generate 2nd table for 82575 based parts */ + if (shift2) + reta |= (0x01010101 * num_rx_queues) << shift2; + + E1000_WRITE_REG(hw, E1000_RETA(j), reta); + } + + /* + * Disable raw packet checksumming so that RSS hash is placed in + * descriptor on writeback. No need to enable TCP/UDP/IP checksum + * offloads as they are enabled by default + */ + rxcsum = E1000_READ_REG(hw, E1000_RXCSUM); + rxcsum |= E1000_RXCSUM_PCSD; + + if (adapter->hw.mac.type >= e1000_82576) + /* Enable Receive Checksum Offload for SCTP */ + rxcsum |= E1000_RXCSUM_CRCOFL; + + /* Don't need to set TUOFL or IPOFL, they default to 1 */ + E1000_WRITE_REG(hw, E1000_RXCSUM, rxcsum); + + /* Generate RSS hash based on packet types, TCP/UDP + * port numbers and/or IPv4/v6 src and dst addresses + */ + mrqc = E1000_MRQC_RSS_FIELD_IPV4 | + E1000_MRQC_RSS_FIELD_IPV4_TCP | + E1000_MRQC_RSS_FIELD_IPV6 | + E1000_MRQC_RSS_FIELD_IPV6_TCP | + E1000_MRQC_RSS_FIELD_IPV6_TCP_EX; + + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV4_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV4_UDP; + if (adapter->flags & IGB_FLAG_RSS_FIELD_IPV6_UDP) + mrqc |= E1000_MRQC_RSS_FIELD_IPV6_UDP; + + /* If VMDq is enabled then we set the appropriate mode for that, else + * we default to RSS so that an RSS hash is calculated per packet even + * if we are only using one queue */ + if (adapter->vfs_allocated_count || adapter->vmdq_pools) { + if (hw->mac.type > e1000_82575) { + /* Set the default pool for the PF's first queue */ + u32 vtctl = E1000_READ_REG(hw, E1000_VT_CTL); + vtctl &= ~(E1000_VT_CTL_DEFAULT_POOL_MASK | + E1000_VT_CTL_DISABLE_DEF_POOL); + vtctl |= adapter->vfs_allocated_count << + E1000_VT_CTL_DEFAULT_POOL_SHIFT; + E1000_WRITE_REG(hw, E1000_VT_CTL, vtctl); + } else if (adapter->rss_queues > 1) { + /* set default queue for pool 1 to queue 2 */ + E1000_WRITE_REG(hw, E1000_VT_CTL, + adapter->rss_queues << 7); + } + if (adapter->rss_queues > 1) + mrqc |= E1000_MRQC_ENABLE_VMDQ_RSS_2Q; + else + mrqc |= E1000_MRQC_ENABLE_VMDQ; + } else { + mrqc |= E1000_MRQC_ENABLE_RSS_4Q; + } + igb_vmm_control(adapter); + + E1000_WRITE_REG(hw, E1000_MRQC, mrqc); +} + +/** + * igb_setup_rctl - configure the receive control registers + * @adapter: Board private structure + **/ +void igb_setup_rctl(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rctl; + + rctl = E1000_READ_REG(hw, E1000_RCTL); + + rctl &= ~(3 << E1000_RCTL_MO_SHIFT); + rctl &= ~(E1000_RCTL_LBM_TCVR | E1000_RCTL_LBM_MAC); + + rctl |= E1000_RCTL_EN | E1000_RCTL_BAM | E1000_RCTL_RDMTS_HALF | + (hw->mac.mc_filter_type << E1000_RCTL_MO_SHIFT); + + /* + * enable stripping of CRC. It's unlikely this will break BMC + * redirection as it did with e1000. Newer features require + * that the HW strips the CRC. + */ + rctl |= E1000_RCTL_SECRC; + + /* disable store bad packets and clear size bits. */ + rctl &= ~(E1000_RCTL_SBP | E1000_RCTL_SZ_256); + + /* enable LPE to prevent packets larger than max_frame_size */ + rctl |= E1000_RCTL_LPE; + + /* disable queue 0 to prevent tail write w/o re-config */ + E1000_WRITE_REG(hw, E1000_RXDCTL(0), 0); + + /* Attention!!! For SR-IOV PF driver operations you must enable + * queue drop for all VF and PF queues to prevent head of line blocking + * if an un-trusted VF does not provide descriptors to hardware. + */ + if (adapter->vfs_allocated_count) { + /* set all queue drop enable bits */ + E1000_WRITE_REG(hw, E1000_QDE, ALL_QUEUES); + } + + E1000_WRITE_REG(hw, E1000_RCTL, rctl); +} + +static inline int igb_set_vf_rlpml(struct igb_adapter *adapter, int size, + int vfn) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vmolr; + + /* if it isn't the PF check to see if VFs are enabled and + * increase the size to support vlan tags */ + if (vfn < adapter->vfs_allocated_count && + adapter->vf_data[vfn].vlans_enabled) + size += VLAN_HLEN; + +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (vfn >= adapter->vfs_allocated_count) { + int queue = vfn - adapter->vfs_allocated_count; + struct igb_vmdq_adapter *vadapter; + + vadapter = netdev_priv(adapter->vmdq_netdev[queue-1]); + if (vadapter->vlgrp) + size += VLAN_HLEN; + } +#endif + vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn)); + vmolr &= ~E1000_VMOLR_RLPML_MASK; + vmolr |= size | E1000_VMOLR_LPE; + E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); + + return 0; +} + +/** + * igb_rlpml_set - set maximum receive packet size + * @adapter: board private structure + * + * Configure maximum receivable packet size. + **/ +static void igb_rlpml_set(struct igb_adapter *adapter) +{ + u32 max_frame_size = adapter->max_frame_size; + struct e1000_hw *hw = &adapter->hw; + u16 pf_id = adapter->vfs_allocated_count; + + if (adapter->vmdq_pools && hw->mac.type != e1000_82575) { + int i; + for (i = 0; i < adapter->vmdq_pools; i++) + igb_set_vf_rlpml(adapter, max_frame_size, pf_id + i); + /* + * If we're in VMDQ or SR-IOV mode, then set global RLPML + * to our max jumbo frame size, in case we need to enable + * jumbo frames on one of the rings later. + * This will not pass over-length frames into the default + * queue because it's gated by the VMOLR.RLPML. + */ + max_frame_size = MAX_JUMBO_FRAME_SIZE; + } + /* Set VF RLPML for the PF device. */ + if (adapter->vfs_allocated_count) + igb_set_vf_rlpml(adapter, max_frame_size, pf_id); + + E1000_WRITE_REG(hw, E1000_RLPML, max_frame_size); +} + +static inline void igb_set_vf_vlan_strip(struct igb_adapter *adapter, + int vfn, bool enable) +{ + struct e1000_hw *hw = &adapter->hw; + u32 val; + void __iomem *reg; + + if (hw->mac.type < e1000_82576) + return; + + if (hw->mac.type == e1000_i350) + reg = hw->hw_addr + E1000_DVMOLR(vfn); + else + reg = hw->hw_addr + E1000_VMOLR(vfn); + + val = readl(reg); + if (enable) + val |= E1000_VMOLR_STRVLAN; + else + val &= ~(E1000_VMOLR_STRVLAN); + writel(val, reg); +} +static inline void igb_set_vmolr(struct igb_adapter *adapter, + int vfn, bool aupe) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vmolr; + + /* + * This register exists only on 82576 and newer so if we are older then + * we should exit and do nothing + */ + if (hw->mac.type < e1000_82576) + return; + + vmolr = E1000_READ_REG(hw, E1000_VMOLR(vfn)); + + if (aupe) + vmolr |= E1000_VMOLR_AUPE; /* Accept untagged packets */ + else + vmolr &= ~(E1000_VMOLR_AUPE); /* Tagged packets ONLY */ + + /* clear all bits that might not be set */ + vmolr &= ~E1000_VMOLR_RSSE; + + if (adapter->rss_queues > 1 && vfn == adapter->vfs_allocated_count) + vmolr |= E1000_VMOLR_RSSE; /* enable RSS */ + + vmolr |= E1000_VMOLR_BAM; /* Accept broadcast */ + vmolr |= E1000_VMOLR_LPE; /* Accept long packets */ + + E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); +} + +/** + * igb_configure_rx_ring - Configure a receive ring after Reset + * @adapter: board private structure + * @ring: receive ring to be configured + * + * Configure the Rx unit of the MAC after a reset. + **/ +void igb_configure_rx_ring(struct igb_adapter *adapter, + struct igb_ring *ring) +{ + struct e1000_hw *hw = &adapter->hw; + u64 rdba = ring->dma; + int reg_idx = ring->reg_idx; + u32 srrctl = 0, rxdctl = 0; + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* + * RLPML prevents us from receiving a frame larger than max_frame so + * it is safe to just set the rx_buffer_len to max_frame without the + * risk of an skb over panic. + */ + ring->rx_buffer_len = max_t(u32, adapter->max_frame_size, + MAXIMUM_ETHERNET_VLAN_SIZE); + +#endif + /* disable the queue */ + E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), 0); + + /* Set DMA base address registers */ + E1000_WRITE_REG(hw, E1000_RDBAL(reg_idx), + rdba & 0x00000000ffffffffULL); + E1000_WRITE_REG(hw, E1000_RDBAH(reg_idx), rdba >> 32); + E1000_WRITE_REG(hw, E1000_RDLEN(reg_idx), + ring->count * sizeof(union e1000_adv_rx_desc)); + + /* initialize head and tail */ + ring->tail = hw->hw_addr + E1000_RDT(reg_idx); + E1000_WRITE_REG(hw, E1000_RDH(reg_idx), 0); + writel(0, ring->tail); + + /* reset next-to- use/clean to place SW in sync with hardwdare */ + ring->next_to_clean = 0; + ring->next_to_use = 0; +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + ring->next_to_alloc = 0; + +#endif + /* set descriptor configuration */ +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + srrctl = IGB_RX_HDR_LEN << E1000_SRRCTL_BSIZEHDRSIZE_SHIFT; + srrctl |= IGB_RX_BUFSZ >> E1000_SRRCTL_BSIZEPKT_SHIFT; +#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + srrctl = ALIGN(ring->rx_buffer_len, 1024) >> + E1000_SRRCTL_BSIZEPKT_SHIFT; +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + srrctl |= E1000_SRRCTL_DESCTYPE_ADV_ONEBUF; +#ifdef HAVE_PTP_1588_CLOCK + if (hw->mac.type >= e1000_82580) + srrctl |= E1000_SRRCTL_TIMESTAMP; +#endif /* HAVE_PTP_1588_CLOCK */ + /* + * We should set the drop enable bit if: + * SR-IOV is enabled + * or + * Flow Control is disabled and number of RX queues > 1 + * + * This allows us to avoid head of line blocking for security + * and performance reasons. + */ + if (adapter->vfs_allocated_count || + (adapter->num_rx_queues > 1 && + (hw->fc.requested_mode == e1000_fc_none || + hw->fc.requested_mode == e1000_fc_rx_pause))) + srrctl |= E1000_SRRCTL_DROP_EN; + + E1000_WRITE_REG(hw, E1000_SRRCTL(reg_idx), srrctl); + + /* set filtering for VMDQ pools */ + igb_set_vmolr(adapter, reg_idx & 0x7, true); + + rxdctl |= IGB_RX_PTHRESH; + rxdctl |= IGB_RX_HTHRESH << 8; + rxdctl |= IGB_RX_WTHRESH << 16; + + /* enable receive descriptor fetching */ + rxdctl |= E1000_RXDCTL_QUEUE_ENABLE; + E1000_WRITE_REG(hw, E1000_RXDCTL(reg_idx), rxdctl); +} + +/** + * igb_configure_rx - Configure receive Unit after Reset + * @adapter: board private structure + * + * Configure the Rx unit of the MAC after a reset. + **/ +static void igb_configure_rx(struct igb_adapter *adapter) +{ + int i; + + /* set UTA to appropriate mode */ + igb_set_uta(adapter); + + igb_full_sync_mac_table(adapter); + /* Setup the HW Rx Head and Tail Descriptor Pointers and + * the Base and Length of the Rx Descriptor Ring */ + for (i = 0; i < adapter->num_rx_queues; i++) + igb_configure_rx_ring(adapter, adapter->rx_ring[i]); +} + +/** + * igb_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + **/ +void igb_free_tx_resources(struct igb_ring *tx_ring) +{ + igb_clean_tx_ring(tx_ring); + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!tx_ring->desc) + return; + + dma_free_coherent(tx_ring->dev, tx_ring->size, + tx_ring->desc, tx_ring->dma); + + tx_ring->desc = NULL; +} + +/** + * igb_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + **/ +static void igb_free_all_tx_resources(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igb_free_tx_resources(adapter->tx_ring[i]); +} + +void igb_unmap_and_free_tx_resource(struct igb_ring *ring, + struct igb_tx_buffer *tx_buffer) +{ + if (tx_buffer->skb) { + dev_kfree_skb_any(tx_buffer->skb); + if (dma_unmap_len(tx_buffer, len)) + dma_unmap_single(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } else if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + } + tx_buffer->next_to_watch = NULL; + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + /* buffer_info must be completely set up in the transmit path */ +} + +/** + * igb_clean_tx_ring - Free Tx Buffers + * @tx_ring: ring to be cleaned + **/ +static void igb_clean_tx_ring(struct igb_ring *tx_ring) +{ + struct igb_tx_buffer *buffer_info; + unsigned long size; + u16 i; + + if (!tx_ring->tx_buffer_info) + return; + /* Free all the Tx ring sk_buffs */ + + for (i = 0; i < tx_ring->count; i++) { + buffer_info = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, buffer_info); + } + + netdev_tx_reset_queue(txring_txq(tx_ring)); + + size = sizeof(struct igb_tx_buffer) * tx_ring->count; + memset(tx_ring->tx_buffer_info, 0, size); + + /* Zero out the descriptor ring */ + memset(tx_ring->desc, 0, tx_ring->size); + + tx_ring->next_to_use = 0; + tx_ring->next_to_clean = 0; +} + +/** + * igb_clean_all_tx_rings - Free Tx Buffers for all queues + * @adapter: board private structure + **/ +static void igb_clean_all_tx_rings(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + igb_clean_tx_ring(adapter->tx_ring[i]); +} + +/** + * igb_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + **/ +void igb_free_rx_resources(struct igb_ring *rx_ring) +{ + igb_clean_rx_ring(rx_ring); + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!rx_ring->desc) + return; + + dma_free_coherent(rx_ring->dev, rx_ring->size, + rx_ring->desc, rx_ring->dma); + + rx_ring->desc = NULL; +} + +/** + * igb_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + **/ +static void igb_free_all_rx_resources(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + igb_free_rx_resources(adapter->rx_ring[i]); +} + +/** + * igb_clean_rx_ring - Free Rx Buffers per Queue + * @rx_ring: ring to free buffers from + **/ +void igb_clean_rx_ring(struct igb_ring *rx_ring) +{ + unsigned long size; + u16 i; + + if (!rx_ring->rx_buffer_info) + return; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + if (rx_ring->skb) + dev_kfree_skb(rx_ring->skb); + rx_ring->skb = NULL; + +#endif + /* Free all the Rx ring sk_buffs */ + for (i = 0; i < rx_ring->count; i++) { + struct igb_rx_buffer *buffer_info = &rx_ring->rx_buffer_info[i]; +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + if (buffer_info->dma) { + dma_unmap_single(rx_ring->dev, + buffer_info->dma, + rx_ring->rx_buffer_len, + DMA_FROM_DEVICE); + buffer_info->dma = 0; + } + + if (buffer_info->skb) { + dev_kfree_skb(buffer_info->skb); + buffer_info->skb = NULL; + } +#else + if (!buffer_info->page) + continue; + + dma_unmap_page(rx_ring->dev, + buffer_info->dma, + PAGE_SIZE, + DMA_FROM_DEVICE); + __free_page(buffer_info->page); + + buffer_info->page = NULL; +#endif + } + + size = sizeof(struct igb_rx_buffer) * rx_ring->count; + memset(rx_ring->rx_buffer_info, 0, size); + + /* Zero out the descriptor ring */ + memset(rx_ring->desc, 0, rx_ring->size); + + rx_ring->next_to_alloc = 0; + rx_ring->next_to_clean = 0; + rx_ring->next_to_use = 0; +} + +/** + * igb_clean_all_rx_rings - Free Rx Buffers for all queues + * @adapter: board private structure + **/ +static void igb_clean_all_rx_rings(struct igb_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + igb_clean_rx_ring(adapter->rx_ring[i]); +} + +/** + * igb_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int igb_set_mac(struct net_device *netdev, void *p) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct sockaddr *addr = p; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + igb_del_mac_filter(adapter, hw->mac.addr, + adapter->vfs_allocated_count); + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + + /* set the correct pool for the new PF MAC address in entry 0 */ + return igb_add_mac_filter(adapter, hw->mac.addr, + adapter->vfs_allocated_count); +} + +/** + * igb_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +int igb_write_mc_addr_list(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; +#ifdef NETDEV_HW_ADDR_T_MULTICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + u8 *mta_list; + int i, count; +#ifdef CONFIG_IGB_VMDQ_NETDEV + int vm; +#endif + count = netdev_mc_count(netdev); +#ifdef CONFIG_IGB_VMDQ_NETDEV + for (vm = 1; vm < adapter->vmdq_pools; vm++) { + if (!adapter->vmdq_netdev[vm]) + break; + if (!netif_running(adapter->vmdq_netdev[vm])) + continue; + count += netdev_mc_count(adapter->vmdq_netdev[vm]); + } +#endif + + if (!count) { + e1000_update_mc_addr_list(hw, NULL, 0); + return 0; + } + mta_list = kzalloc(count * 6, GFP_ATOMIC); + if (!mta_list) + return -ENOMEM; + + /* The shared function expects a packed array of only addresses. */ + i = 0; + netdev_for_each_mc_addr(ha, netdev) +#ifdef NETDEV_HW_ADDR_T_MULTICAST + memcpy(mta_list + (i++ * ETH_ALEN), ha->addr, ETH_ALEN); +#else + memcpy(mta_list + (i++ * ETH_ALEN), ha->dmi_addr, ETH_ALEN); +#endif +#ifdef CONFIG_IGB_VMDQ_NETDEV + for (vm = 1; vm < adapter->vmdq_pools; vm++) { + if (!adapter->vmdq_netdev[vm]) + break; + if (!netif_running(adapter->vmdq_netdev[vm]) || + !netdev_mc_count(adapter->vmdq_netdev[vm])) + continue; + netdev_for_each_mc_addr(ha, adapter->vmdq_netdev[vm]) +#ifdef NETDEV_HW_ADDR_T_MULTICAST + memcpy(mta_list + (i++ * ETH_ALEN), + ha->addr, ETH_ALEN); +#else + memcpy(mta_list + (i++ * ETH_ALEN), + ha->dmi_addr, ETH_ALEN); +#endif + } +#endif + e1000_update_mc_addr_list(hw, mta_list, i); + kfree(mta_list); + + return count; +} + +void igb_rar_set(struct igb_adapter *adapter, u32 index) +{ + u32 rar_low, rar_high; + struct e1000_hw *hw = &adapter->hw; + u8 *addr = adapter->mac_table[index].addr; + /* HW expects these in little endian so we reverse the byte order + * from network order (big endian) to little endian + */ + rar_low = ((u32) addr[0] | ((u32) addr[1] << 8) | + ((u32) addr[2] << 16) | ((u32) addr[3] << 24)); + rar_high = ((u32) addr[4] | ((u32) addr[5] << 8)); + + /* Indicate to hardware the Address is Valid. */ + if (adapter->mac_table[index].state & IGB_MAC_STATE_IN_USE) + rar_high |= E1000_RAH_AV; + + if (hw->mac.type == e1000_82575) + rar_high |= E1000_RAH_POOL_1 * adapter->mac_table[index].queue; + else + rar_high |= E1000_RAH_POOL_1 << adapter->mac_table[index].queue; + + E1000_WRITE_REG(hw, E1000_RAL(index), rar_low); + E1000_WRITE_FLUSH(hw); + E1000_WRITE_REG(hw, E1000_RAH(index), rar_high); + E1000_WRITE_FLUSH(hw); +} + +void igb_full_sync_mac_table(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.rar_entry_count; i++) { + igb_rar_set(adapter, i); + } +} + +void igb_sync_mac_table(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (adapter->mac_table[i].state & IGB_MAC_STATE_MODIFIED) + igb_rar_set(adapter, i); + adapter->mac_table[i].state &= ~(IGB_MAC_STATE_MODIFIED); + } +} + +int igb_available_rars(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i, count = 0; + + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (adapter->mac_table[i].state == 0) + count++; + } + return count; +} + +#ifdef HAVE_SET_RX_MODE +/** + * igb_write_uc_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure + * + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table + **/ +static int igb_write_uc_addr_list(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + unsigned int vfn = adapter->vfs_allocated_count; + int count = 0; + + /* return ENOMEM indicating insufficient memory for addresses */ + if (netdev_uc_count(netdev) > igb_available_rars(adapter)) + return -ENOMEM; + if (!netdev_uc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + netdev_for_each_uc_addr(ha, netdev) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + igb_del_mac_filter(adapter, ha->addr, vfn); + igb_add_mac_filter(adapter, ha->addr, vfn); +#else + igb_del_mac_filter(adapter, ha->da_addr, vfn); + igb_add_mac_filter(adapter, ha->da_addr, vfn); +#endif + count++; + } + } + return count; +} + +#endif /* HAVE_SET_RX_MODE */ +/** + * igb_set_rx_mode - Secondary Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_rx_mode entry point is called whenever the unicast or multicast + * address lists or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast, + * promiscuous mode, and all-multi behavior. + **/ +static void igb_set_rx_mode(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + unsigned int vfn = adapter->vfs_allocated_count; + u32 rctl, vmolr = 0; + int count; + + /* Check for Promiscuous and All Multicast modes */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + + /* clear the effected bits */ + rctl &= ~(E1000_RCTL_UPE | E1000_RCTL_MPE | E1000_RCTL_VFE); + + if (netdev->flags & IFF_PROMISC) { + rctl |= (E1000_RCTL_UPE | E1000_RCTL_MPE); + vmolr |= (E1000_VMOLR_ROPE | E1000_VMOLR_MPME); + /* retain VLAN HW filtering if in VT mode */ + if (adapter->vfs_allocated_count || adapter->vmdq_pools) + rctl |= E1000_RCTL_VFE; + } else { + if (netdev->flags & IFF_ALLMULTI) { + rctl |= E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + } else { + /* + * Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = igb_write_mc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_MPE; + vmolr |= E1000_VMOLR_MPME; + } else if (count) { + vmolr |= E1000_VMOLR_ROMPE; + } + } +#ifdef HAVE_SET_RX_MODE + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = igb_write_uc_addr_list(netdev); + if (count < 0) { + rctl |= E1000_RCTL_UPE; + vmolr |= E1000_VMOLR_ROPE; + } +#endif /* HAVE_SET_RX_MODE */ + rctl |= E1000_RCTL_VFE; + } + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + + /* + * In order to support SR-IOV and eventually VMDq it is necessary to set + * the VMOLR to enable the appropriate modes. Without this workaround + * we will have issues with VLAN tag stripping not being done for frames + * that are only arriving because we are the default pool + */ + if (hw->mac.type < e1000_82576) + return; + + vmolr |= E1000_READ_REG(hw, E1000_VMOLR(vfn)) & + ~(E1000_VMOLR_ROPE | E1000_VMOLR_MPME | E1000_VMOLR_ROMPE); + E1000_WRITE_REG(hw, E1000_VMOLR(vfn), vmolr); + igb_restore_vf_multicasts(adapter); +} + +static void igb_check_wvbr(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 wvbr = 0; + + switch (hw->mac.type) { + case e1000_82576: + case e1000_i350: + if (!(wvbr = E1000_READ_REG(hw, E1000_WVBR))) + return; + break; + default: + break; + } + + adapter->wvbr |= wvbr; +} + +#define IGB_STAGGERED_QUEUE_OFFSET 8 + +static void igb_spoof_check(struct igb_adapter *adapter) +{ + int j; + + if (!adapter->wvbr) + return; + + switch (adapter->hw.mac.type) { + case e1000_82576: + for (j = 0; j < adapter->vfs_allocated_count; j++) { + if (adapter->wvbr & (1 << j) || + adapter->wvbr & (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))) { + DPRINTK(DRV, WARNING, + "Spoof event(s) detected on VF %d\n", j); + adapter->wvbr &= + ~((1 << j) | + (1 << (j + IGB_STAGGERED_QUEUE_OFFSET))); + } + } + break; + case e1000_i350: + for (j = 0; j < adapter->vfs_allocated_count; j++) { + if (adapter->wvbr & (1 << j)) { + DPRINTK(DRV, WARNING, + "Spoof event(s) detected on VF %d\n", j); + adapter->wvbr &= ~(1 << j); + } + } + break; + default: + break; + } +} + +/* Need to wait a few seconds after link up to get diagnostic information from + * the phy */ +static void igb_update_phy_info(unsigned long data) +{ + struct igb_adapter *adapter = (struct igb_adapter *) data; + e1000_get_phy_info(&adapter->hw); +} + +/** + * igb_has_link - check shared code for link and determine up/down + * @adapter: pointer to driver private info + **/ +bool igb_has_link(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + bool link_active = FALSE; + + /* get_link_status is set on LSC (link status) interrupt or + * rx sequence error interrupt. get_link_status will stay + * false until the e1000_check_for_link establishes link + * for copper adapters ONLY + */ + switch (hw->phy.media_type) { + case e1000_media_type_copper: + if (!hw->mac.get_link_status) + return true; + case e1000_media_type_internal_serdes: + e1000_check_for_link(hw); + link_active = !hw->mac.get_link_status; + break; + case e1000_media_type_unknown: + default: + break; + } + + if (((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) && + (hw->phy.id == I210_I_PHY_ID)) { + if (!netif_carrier_ok(adapter->netdev)) { + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + } else if (!(adapter->flags & IGB_FLAG_NEED_LINK_UPDATE)) { + adapter->flags |= IGB_FLAG_NEED_LINK_UPDATE; + adapter->link_check_timeout = jiffies; + } + } + + return link_active; +} + +/** + * igb_watchdog - Timer Call-back + * @data: pointer to adapter cast into an unsigned long + **/ +static void igb_watchdog(unsigned long data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + /* Do the rest outside of interrupt context */ + schedule_work(&adapter->watchdog_task); +} + +static void igb_watchdog_task(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, + struct igb_adapter, + watchdog_task); + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + u32 link; + int i; + u32 thstat, ctrl_ext; + u32 connsw; + + link = igb_has_link(adapter); + /* Force link down if we have fiber to swap to */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + if (hw->phy.media_type == e1000_media_type_copper) { + connsw = E1000_READ_REG(hw, E1000_CONNSW); + if (!(connsw & E1000_CONNSW_AUTOSENSE_EN)) + link = 0; + } + } + + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) { + if (time_after(jiffies, (adapter->link_check_timeout + HZ))) + adapter->flags &= ~IGB_FLAG_NEED_LINK_UPDATE; + else + link = FALSE; + } + + if (link) { + /* Perform a reset if the media type changed. */ + if (hw->dev_spec._82575.media_changed) { + hw->dev_spec._82575.media_changed = false; + adapter->flags |= IGB_FLAG_MEDIA_RESET; + igb_reset(adapter); + } + + /* Cancel scheduled suspend requests. */ + pm_runtime_resume(netdev->dev.parent); + + if (!netif_carrier_ok(netdev)) { + u32 ctrl; + e1000_get_speed_and_duplex(hw, + &adapter->link_speed, + &adapter->link_duplex); + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + /* Links status message must follow this format */ + printk(KERN_INFO "igb: %s NIC Link is Up %d Mbps %s, " + "Flow Control: %s\n", + netdev->name, + adapter->link_speed, + adapter->link_duplex == FULL_DUPLEX ? + "Full Duplex" : "Half Duplex", + ((ctrl & E1000_CTRL_TFCE) && + (ctrl & E1000_CTRL_RFCE)) ? "RX/TX": + ((ctrl & E1000_CTRL_RFCE) ? "RX" : + ((ctrl & E1000_CTRL_TFCE) ? "TX" : "None"))); + /* adjust timeout factor according to speed/duplex */ + adapter->tx_timeout_factor = 1; + switch (adapter->link_speed) { + case SPEED_10: + adapter->tx_timeout_factor = 14; + break; + case SPEED_100: + /* maybe add some timeout factor ? */ + break; + default: + break; + } + + netif_carrier_on(netdev); + netif_tx_wake_all_queues(netdev); + + igb_ping_all_vfs(adapter); +#ifdef IFLA_VF_MAX + igb_check_vf_rate_limit(adapter); +#endif /* IFLA_VF_MAX */ + + /* link state has changed, schedule phy info update */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->phy_info_timer, + round_jiffies(jiffies + 2 * HZ)); + } + } else { + if (netif_carrier_ok(netdev)) { + adapter->link_speed = 0; + adapter->link_duplex = 0; + /* check for thermal sensor event on i350 */ + if (hw->mac.type == e1000_i350) { + thstat = E1000_READ_REG(hw, E1000_THSTAT); + ctrl_ext = E1000_READ_REG(hw, E1000_CTRL_EXT); + if ((hw->phy.media_type == + e1000_media_type_copper) && + !(ctrl_ext & + E1000_CTRL_EXT_LINK_MODE_SGMII)) { + if (thstat & E1000_THSTAT_PWR_DOWN) { + printk(KERN_ERR "igb: %s The " + "network adapter was stopped " + "because it overheated.\n", + netdev->name); + } + if (thstat & E1000_THSTAT_LINK_THROTTLE) { + printk(KERN_INFO + "igb: %s The network " + "adapter supported " + "link speed " + "was downshifted " + "because it " + "overheated.\n", + netdev->name); + } + } + } + + /* Links status message must follow this format */ + printk(KERN_INFO "igb: %s NIC Link is Down\n", + netdev->name); + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + + igb_ping_all_vfs(adapter); + + /* link state has changed, schedule phy info update */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->phy_info_timer, + round_jiffies(jiffies + 2 * HZ)); + /* link is down, time to check for alternate media */ + if (adapter->flags & IGB_FLAG_MAS_ENABLE) { + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } + pm_schedule_suspend(netdev->dev.parent, + MSEC_PER_SEC * 5); + + /* also check for alternate media here */ + } else if (!netif_carrier_ok(netdev) && + (adapter->flags & IGB_FLAG_MAS_ENABLE)) { + hw->mac.ops.power_up_serdes(hw); + igb_check_swap_media(adapter); + if (adapter->flags & IGB_FLAG_MEDIA_RESET) { + schedule_work(&adapter->reset_task); + /* return immediately */ + return; + } + } + } + + igb_update_stats(adapter); + + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *tx_ring = adapter->tx_ring[i]; + if (!netif_carrier_ok(netdev)) { + /* We've lost link, so the controller stops DMA, + * but we've got queued Tx work that's never going + * to get done, so reset controller to flush Tx. + * (Do the reset outside of interrupt context). */ + if (igb_desc_unused(tx_ring) + 1 < tx_ring->count) { + adapter->tx_timeout_count++; + schedule_work(&adapter->reset_task); + /* return immediately since reset is imminent */ + return; + } + } + + /* Force detection of hung controller every watchdog period */ + set_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); + } + + /* Cause software interrupt to ensure rx ring is cleaned */ + if (adapter->msix_entries) { + u32 eics = 0; + for (i = 0; i < adapter->num_q_vectors; i++) + eics |= adapter->q_vector[i]->eims_value; + E1000_WRITE_REG(hw, E1000_EICS, eics); + } else { + E1000_WRITE_REG(hw, E1000_ICS, E1000_ICS_RXDMT0); + } + + igb_spoof_check(adapter); + + /* Reset the timer */ + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->flags & IGB_FLAG_NEED_LINK_UPDATE) + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + HZ)); + else + mod_timer(&adapter->watchdog_timer, + round_jiffies(jiffies + 2 * HZ)); + } +} + +static void igb_dma_err_task(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, + struct igb_adapter, + dma_err_task); + int vf; + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + u32 hgptc; + u32 ciaa, ciad; + + hgptc = E1000_READ_REG(hw, E1000_HGPTC); + if (hgptc) /* If incrementing then no need for the check below */ + goto dma_timer_reset; + /* + * Check to see if a bad DMA write target from an errant or + * malicious VF has caused a PCIe error. If so then we can + * issue a VFLR to the offending VF(s) and then resume without + * requesting a full slot reset. + */ + + for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { + ciaa = (vf << 16) | 0x80000000; + /* 32 bit read so align, we really want status at offset 6 */ + ciaa |= PCI_COMMAND; + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + ciad = E1000_READ_REG(hw, E1000_CIAD); + ciaa &= 0x7FFFFFFF; + /* disable debug mode asap after reading data */ + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + /* Get the upper 16 bits which will be the PCI status reg */ + ciad >>= 16; + if (ciad & (PCI_STATUS_REC_MASTER_ABORT | + PCI_STATUS_REC_TARGET_ABORT | + PCI_STATUS_SIG_SYSTEM_ERROR)) { + netdev_err(netdev, "VF %d suffered error\n", vf); + /* Issue VFLR */ + ciaa = (vf << 16) | 0x80000000; + ciaa |= 0xA8; + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + ciad = 0x00008000; /* VFLR */ + E1000_WRITE_REG(hw, E1000_CIAD, ciad); + ciaa &= 0x7FFFFFFF; + E1000_WRITE_REG(hw, E1000_CIAA, ciaa); + } + } +dma_timer_reset: + /* Reset the timer */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->dma_err_timer, + round_jiffies(jiffies + HZ / 10)); +} + +/** + * igb_dma_err_timer - Timer Call-back + * @data: pointer to adapter cast into an unsigned long + **/ +static void igb_dma_err_timer(unsigned long data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + /* Do the rest outside of interrupt context */ + schedule_work(&adapter->dma_err_task); +} + +enum latency_range { + lowest_latency = 0, + low_latency = 1, + bulk_latency = 2, + latency_invalid = 255 +}; + +/** + * igb_update_ring_itr - update the dynamic ITR value based on packet size + * + * Stores a new ITR value based on strictly on packet size. This + * algorithm is less sophisticated than that used in igb_update_itr, + * due to the difficulty of synchronizing statistics across multiple + * receive rings. The divisors and thresholds used by this function + * were determined based on theoretical maximum wire speed and testing + * data, in order to minimize response time while increasing bulk + * throughput. + * This functionality is controlled by the InterruptThrottleRate module + * parameter (see igb_param.c) + * NOTE: This function is called only when operating in a multiqueue + * receive environment. + * @q_vector: pointer to q_vector + **/ +static void igb_update_ring_itr(struct igb_q_vector *q_vector) +{ + int new_val = q_vector->itr_val; + int avg_wire_size = 0; + struct igb_adapter *adapter = q_vector->adapter; + unsigned int packets; + + /* For non-gigabit speeds, just fix the interrupt rate at 4000 + * ints/sec - ITR timer value of 120 ticks. + */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + new_val = IGB_4K_ITR; + goto set_itr_val; + default: + break; + } + + packets = q_vector->rx.total_packets; + if (packets) + avg_wire_size = q_vector->rx.total_bytes / packets; + + packets = q_vector->tx.total_packets; + if (packets) + avg_wire_size = max_t(u32, avg_wire_size, + q_vector->tx.total_bytes / packets); + + /* if avg_wire_size isn't set no work was done */ + if (!avg_wire_size) + goto clear_counts; + + /* Add 24 bytes to size to account for CRC, preamble, and gap */ + avg_wire_size += 24; + + /* Don't starve jumbo frames */ + avg_wire_size = min(avg_wire_size, 3000); + + /* Give a little boost to mid-size frames */ + if ((avg_wire_size > 300) && (avg_wire_size < 1200)) + new_val = avg_wire_size / 3; + else + new_val = avg_wire_size / 2; + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (new_val < IGB_20K_ITR && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + new_val = IGB_20K_ITR; + +set_itr_val: + if (new_val != q_vector->itr_val) { + q_vector->itr_val = new_val; + q_vector->set_itr = 1; + } +clear_counts: + q_vector->rx.total_bytes = 0; + q_vector->rx.total_packets = 0; + q_vector->tx.total_bytes = 0; + q_vector->tx.total_packets = 0; +} + +/** + * igb_update_itr - update the dynamic ITR value based on statistics + * Stores a new ITR value based on packets and byte + * counts during the last interrupt. The advantage of per interrupt + * computation is faster updates and more accurate ITR for the current + * traffic pattern. Constants in this function were computed + * based on theoretical maximum wire speed and thresholds were set based + * on testing data as well as attempting to minimize response time + * while increasing bulk throughput. + * this functionality is controlled by the InterruptThrottleRate module + * parameter (see igb_param.c) + * NOTE: These calculations are only valid when operating in a single- + * queue environment. + * @q_vector: pointer to q_vector + * @ring_container: ring info to update the itr for + **/ +static void igb_update_itr(struct igb_q_vector *q_vector, + struct igb_ring_container *ring_container) +{ + unsigned int packets = ring_container->total_packets; + unsigned int bytes = ring_container->total_bytes; + u8 itrval = ring_container->itr; + + /* no packets, exit with status unchanged */ + if (packets == 0) + return; + + switch (itrval) { + case lowest_latency: + /* handle TSO and jumbo frames */ + if (bytes/packets > 8000) + itrval = bulk_latency; + else if ((packets < 5) && (bytes > 512)) + itrval = low_latency; + break; + case low_latency: /* 50 usec aka 20000 ints/s */ + if (bytes > 10000) { + /* this if handles the TSO accounting */ + if (bytes/packets > 8000) { + itrval = bulk_latency; + } else if ((packets < 10) || ((bytes/packets) > 1200)) { + itrval = bulk_latency; + } else if ((packets > 35)) { + itrval = lowest_latency; + } + } else if (bytes/packets > 2000) { + itrval = bulk_latency; + } else if (packets <= 2 && bytes < 512) { + itrval = lowest_latency; + } + break; + case bulk_latency: /* 250 usec aka 4000 ints/s */ + if (bytes > 25000) { + if (packets > 35) + itrval = low_latency; + } else if (bytes < 1500) { + itrval = low_latency; + } + break; + } + + /* clear work counters since we have the values we need */ + ring_container->total_bytes = 0; + ring_container->total_packets = 0; + + /* write updated itr to ring container */ + ring_container->itr = itrval; +} + +static void igb_set_itr(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + u32 new_itr = q_vector->itr_val; + u8 current_itr = 0; + + /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ + switch (adapter->link_speed) { + case SPEED_10: + case SPEED_100: + current_itr = 0; + new_itr = IGB_4K_ITR; + goto set_itr_now; + default: + break; + } + + igb_update_itr(q_vector, &q_vector->tx); + igb_update_itr(q_vector, &q_vector->rx); + + current_itr = max(q_vector->rx.itr, q_vector->tx.itr); + + /* conservative mode (itr 3) eliminates the lowest_latency setting */ + if (current_itr == lowest_latency && + ((q_vector->rx.ring && adapter->rx_itr_setting == 3) || + (!q_vector->rx.ring && adapter->tx_itr_setting == 3))) + current_itr = low_latency; + + switch (current_itr) { + /* counts and packets in update_itr are dependent on these numbers */ + case lowest_latency: + new_itr = IGB_70K_ITR; /* 70,000 ints/sec */ + break; + case low_latency: + new_itr = IGB_20K_ITR; /* 20,000 ints/sec */ + break; + case bulk_latency: + new_itr = IGB_4K_ITR; /* 4,000 ints/sec */ + break; + default: + break; + } + +set_itr_now: + if (new_itr != q_vector->itr_val) { + /* this attempts to bias the interrupt rate towards Bulk + * by adding intermediate steps when interrupt rate is + * increasing */ + new_itr = new_itr > q_vector->itr_val ? + max((new_itr * q_vector->itr_val) / + (new_itr + (q_vector->itr_val >> 2)), + new_itr) : + new_itr; + /* Don't write the value here; it resets the adapter's + * internal timer, and causes us to delay far longer than + * we should between interrupts. Instead, we write the ITR + * value at the beginning of the next interrupt so the timing + * ends up being correct. + */ + q_vector->itr_val = new_itr; + q_vector->set_itr = 1; + } +} + +void igb_tx_ctxtdesc(struct igb_ring *tx_ring, u32 vlan_macip_lens, + u32 type_tucmd, u32 mss_l4len_idx) +{ + struct e1000_adv_tx_context_desc *context_desc; + u16 i = tx_ring->next_to_use; + + context_desc = IGB_TX_CTXTDESC(tx_ring, i); + + i++; + tx_ring->next_to_use = (i < tx_ring->count) ? i : 0; + + /* set bits to identify this as an advanced context descriptor */ + type_tucmd |= E1000_TXD_CMD_DEXT | E1000_ADVTXD_DTYP_CTXT; + + /* For 82575, context index must be unique per ring. */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + mss_l4len_idx |= tx_ring->reg_idx << 4; + + context_desc->vlan_macip_lens = cpu_to_le32(vlan_macip_lens); + context_desc->seqnum_seed = 0; + context_desc->type_tucmd_mlhl = cpu_to_le32(type_tucmd); + context_desc->mss_l4len_idx = cpu_to_le32(mss_l4len_idx); +} + +static int igb_tso(struct igb_ring *tx_ring, + struct igb_tx_buffer *first, + u8 *hdr_len) +{ +#ifdef NETIF_F_TSO + struct sk_buff *skb = first->skb; + u32 vlan_macip_lens, type_tucmd; + u32 mss_l4len_idx, l4len; + + if (skb->ip_summed != CHECKSUM_PARTIAL) + return 0; + + if (!skb_is_gso(skb)) +#endif /* NETIF_F_TSO */ + return 0; +#ifdef NETIF_F_TSO + + if (skb_header_cloned(skb)) { + int err = pskb_expand_head(skb, 0, 0, GFP_ATOMIC); + if (err) + return err; + } + + /* ADV DTYP TUCMD MKRLOC/ISCSIHEDLEN */ + type_tucmd = E1000_ADVTXD_TUCMD_L4T_TCP; + + if (first->protocol == __constant_htons(ETH_P_IP)) { + struct iphdr *iph = ip_hdr(skb); + iph->tot_len = 0; + iph->check = 0; + tcp_hdr(skb)->check = ~csum_tcpudp_magic(iph->saddr, + iph->daddr, 0, + IPPROTO_TCP, + 0); + type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; + first->tx_flags |= IGB_TX_FLAGS_TSO | + IGB_TX_FLAGS_CSUM | + IGB_TX_FLAGS_IPV4; +#ifdef NETIF_F_TSO6 + } else if (skb_is_gso_v6(skb)) { + ipv6_hdr(skb)->payload_len = 0; + tcp_hdr(skb)->check = ~csum_ipv6_magic(&ipv6_hdr(skb)->saddr, + &ipv6_hdr(skb)->daddr, + 0, IPPROTO_TCP, 0); + first->tx_flags |= IGB_TX_FLAGS_TSO | + IGB_TX_FLAGS_CSUM; +#endif + } + + /* compute header lengths */ + l4len = tcp_hdrlen(skb); + *hdr_len = skb_transport_offset(skb) + l4len; + + /* update gso size and bytecount with header size */ + first->gso_segs = skb_shinfo(skb)->gso_segs; + first->bytecount += (first->gso_segs - 1) * *hdr_len; + + /* MSS L4LEN IDX */ + mss_l4len_idx = l4len << E1000_ADVTXD_L4LEN_SHIFT; + mss_l4len_idx |= skb_shinfo(skb)->gso_size << E1000_ADVTXD_MSS_SHIFT; + + /* VLAN MACLEN IPLEN */ + vlan_macip_lens = skb_network_header_len(skb); + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; + + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); + + return 1; +#endif /* NETIF_F_TSO */ +} + +static void igb_tx_csum(struct igb_ring *tx_ring, struct igb_tx_buffer *first) +{ + struct sk_buff *skb = first->skb; + u32 vlan_macip_lens = 0; + u32 mss_l4len_idx = 0; + u32 type_tucmd = 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(first->tx_flags & IGB_TX_FLAGS_VLAN)) + return; + } else { + u8 nexthdr = 0; + switch (first->protocol) { + case __constant_htons(ETH_P_IP): + vlan_macip_lens |= skb_network_header_len(skb); + type_tucmd |= E1000_ADVTXD_TUCMD_IPV4; + nexthdr = ip_hdr(skb)->protocol; + break; +#ifdef NETIF_F_IPV6_CSUM + case __constant_htons(ETH_P_IPV6): + vlan_macip_lens |= skb_network_header_len(skb); + nexthdr = ipv6_hdr(skb)->nexthdr; + break; +#endif + default: + if (unlikely(net_ratelimit())) { + dev_warn(tx_ring->dev, + "partial checksum but proto=%x!\n", + first->protocol); + } + break; + } + + switch (nexthdr) { + case IPPROTO_TCP: + type_tucmd |= E1000_ADVTXD_TUCMD_L4T_TCP; + mss_l4len_idx = tcp_hdrlen(skb) << + E1000_ADVTXD_L4LEN_SHIFT; + break; +#ifdef HAVE_SCTP + case IPPROTO_SCTP: + type_tucmd |= E1000_ADVTXD_TUCMD_L4T_SCTP; + mss_l4len_idx = sizeof(struct sctphdr) << + E1000_ADVTXD_L4LEN_SHIFT; + break; +#endif + case IPPROTO_UDP: + mss_l4len_idx = sizeof(struct udphdr) << + E1000_ADVTXD_L4LEN_SHIFT; + break; + default: + if (unlikely(net_ratelimit())) { + dev_warn(tx_ring->dev, + "partial checksum but l4 proto=%x!\n", + nexthdr); + } + break; + } + + /* update TX checksum flag */ + first->tx_flags |= IGB_TX_FLAGS_CSUM; + } + + vlan_macip_lens |= skb_network_offset(skb) << E1000_ADVTXD_MACLEN_SHIFT; + vlan_macip_lens |= first->tx_flags & IGB_TX_FLAGS_VLAN_MASK; + + igb_tx_ctxtdesc(tx_ring, vlan_macip_lens, type_tucmd, mss_l4len_idx); +} + +#define IGB_SET_FLAG(_input, _flag, _result) \ + ((_flag <= _result) ? \ + ((u32)(_input & _flag) * (_result / _flag)) : \ + ((u32)(_input & _flag) / (_flag / _result))) + +static u32 igb_tx_cmd_type(struct sk_buff *skb, u32 tx_flags) +{ + /* set type for advanced descriptor with frame checksum insertion */ + u32 cmd_type = E1000_ADVTXD_DTYP_DATA | + E1000_ADVTXD_DCMD_DEXT | + E1000_ADVTXD_DCMD_IFCS; + + /* set HW vlan bit if vlan is present */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_VLAN, + (E1000_ADVTXD_DCMD_VLE)); + + /* set segmentation bits for TSO */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSO, + (E1000_ADVTXD_DCMD_TSE)); + + /* set timestamp bit if present */ + cmd_type |= IGB_SET_FLAG(tx_flags, IGB_TX_FLAGS_TSTAMP, + (E1000_ADVTXD_MAC_TSTAMP)); + + return cmd_type; +} + +static void igb_tx_olinfo_status(struct igb_ring *tx_ring, + union e1000_adv_tx_desc *tx_desc, + u32 tx_flags, unsigned int paylen) +{ + u32 olinfo_status = paylen << E1000_ADVTXD_PAYLEN_SHIFT; + + /* 82575 requires a unique index per ring */ + if (test_bit(IGB_RING_FLAG_TX_CTX_IDX, &tx_ring->flags)) + olinfo_status |= tx_ring->reg_idx << 4; + + /* insert L4 checksum */ + olinfo_status |= IGB_SET_FLAG(tx_flags, + IGB_TX_FLAGS_CSUM, + (E1000_TXD_POPTS_TXSM << 8)); + + /* insert IPv4 checksum */ + olinfo_status |= IGB_SET_FLAG(tx_flags, + IGB_TX_FLAGS_IPV4, + (E1000_TXD_POPTS_IXSM << 8)); + + tx_desc->read.olinfo_status = cpu_to_le32(olinfo_status); +} + +static void igb_tx_map(struct igb_ring *tx_ring, + struct igb_tx_buffer *first, + const u8 hdr_len) +{ + struct sk_buff *skb = first->skb; + struct igb_tx_buffer *tx_buffer; + union e1000_adv_tx_desc *tx_desc; + struct skb_frag_struct *frag; + dma_addr_t dma; + unsigned int data_len, size; + u32 tx_flags = first->tx_flags; + u32 cmd_type = igb_tx_cmd_type(skb, tx_flags); + u16 i = tx_ring->next_to_use; + + tx_desc = IGB_TX_DESC(tx_ring, i); + + igb_tx_olinfo_status(tx_ring, tx_desc, tx_flags, skb->len - hdr_len); + + size = skb_headlen(skb); + data_len = skb->data_len; + + dma = dma_map_single(tx_ring->dev, skb->data, size, DMA_TO_DEVICE); + + tx_buffer = first; + + for (frag = &skb_shinfo(skb)->frags[0];; frag++) { + if (dma_mapping_error(tx_ring->dev, dma)) + goto dma_error; + + /* record length, and DMA address */ + dma_unmap_len_set(tx_buffer, len, size); + dma_unmap_addr_set(tx_buffer, dma, dma); + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + + while (unlikely(size > IGB_MAX_DATA_PER_TXD)) { + tx_desc->read.cmd_type_len = + cpu_to_le32(cmd_type ^ IGB_MAX_DATA_PER_TXD); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGB_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + + dma += IGB_MAX_DATA_PER_TXD; + size -= IGB_MAX_DATA_PER_TXD; + + tx_desc->read.buffer_addr = cpu_to_le64(dma); + } + + if (likely(!data_len)) + break; + + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type ^ size); + + i++; + tx_desc++; + if (i == tx_ring->count) { + tx_desc = IGB_TX_DESC(tx_ring, 0); + i = 0; + } + tx_desc->read.olinfo_status = 0; + + size = skb_frag_size(frag); + data_len -= size; + + dma = skb_frag_dma_map(tx_ring->dev, frag, 0, + size, DMA_TO_DEVICE); + + tx_buffer = &tx_ring->tx_buffer_info[i]; + } + + /* write last descriptor with RS and EOP bits */ + cmd_type |= size | IGB_TXD_DCMD; + tx_desc->read.cmd_type_len = cpu_to_le32(cmd_type); + + netdev_tx_sent_queue(txring_txq(tx_ring), first->bytecount); + /* set the timestamp */ + first->time_stamp = jiffies; + + /* + * Force memory writes to complete before letting h/w know there + * are new descriptors to fetch. (Only applicable for weak-ordered + * memory model archs, such as IA-64). + * + * We also need this memory barrier to make certain all of the + * status bits have been updated before next_to_watch is written. + */ + wmb(); + + /* set next_to_watch value indicating a packet is present */ + first->next_to_watch = tx_desc; + + i++; + if (i == tx_ring->count) + i = 0; + + tx_ring->next_to_use = i; + + writel(i, tx_ring->tail); + + /* we need this if more than one processor can write to our tail + * at a time, it syncronizes IO on IA64/Altix systems */ + mmiowb(); + + return; + +dma_error: + dev_err(tx_ring->dev, "TX DMA map failed\n"); + + /* clear dma mappings for failed tx_buffer_info map */ + for (;;) { + tx_buffer = &tx_ring->tx_buffer_info[i]; + igb_unmap_and_free_tx_resource(tx_ring, tx_buffer); + if (tx_buffer == first) + break; + if (i == 0) + i = tx_ring->count; + i--; + } + + tx_ring->next_to_use = i; +} + +static int __igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +{ + struct net_device *netdev = netdev_ring(tx_ring); + + if (netif_is_multiqueue(netdev)) + netif_stop_subqueue(netdev, ring_queue_index(tx_ring)); + else + netif_stop_queue(netdev); + + /* Herbert's original patch had: + * smp_mb__after_netif_stop_queue(); + * but since that doesn't exist yet, just open code it. */ + smp_mb(); + + /* We need to check again in a case another CPU has just + * made room available. */ + if (igb_desc_unused(tx_ring) < size) + return -EBUSY; + + /* A reprieve! */ + if (netif_is_multiqueue(netdev)) + netif_wake_subqueue(netdev, ring_queue_index(tx_ring)); + else + netif_wake_queue(netdev); + + tx_ring->tx_stats.restart_queue++; + + return 0; +} + +static inline int igb_maybe_stop_tx(struct igb_ring *tx_ring, const u16 size) +{ + if (igb_desc_unused(tx_ring) >= size) + return 0; + return __igb_maybe_stop_tx(tx_ring, size); +} + +netdev_tx_t igb_xmit_frame_ring(struct sk_buff *skb, + struct igb_ring *tx_ring) +{ + struct igb_tx_buffer *first; + int tso; + u32 tx_flags = 0; +#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD + unsigned short f; +#endif + u16 count = TXD_USE_COUNT(skb_headlen(skb)); + __be16 protocol = vlan_get_protocol(skb); + u8 hdr_len = 0; + + /* + * need: 1 descriptor per page * PAGE_SIZE/IGB_MAX_DATA_PER_TXD, + * + 1 desc for skb_headlen/IGB_MAX_DATA_PER_TXD, + * + 2 desc gap to keep tail from touching head, + * + 1 desc for context descriptor, + * otherwise try next time + */ +#if PAGE_SIZE > IGB_MAX_DATA_PER_TXD + for (f = 0; f < skb_shinfo(skb)->nr_frags; f++) + count += TXD_USE_COUNT(skb_shinfo(skb)->frags[f].size); +#else + count += skb_shinfo(skb)->nr_frags; +#endif + if (igb_maybe_stop_tx(tx_ring, count + 3)) { + /* this is a hard error */ + return NETDEV_TX_BUSY; + } + + /* record the location of the first descriptor for this packet */ + first = &tx_ring->tx_buffer_info[tx_ring->next_to_use]; + first->skb = skb; + first->bytecount = skb->len; + first->gso_segs = 1; + + skb_tx_timestamp(skb); + +#ifdef HAVE_PTP_1588_CLOCK + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + struct igb_adapter *adapter = netdev_priv(tx_ring->netdev); + if (!adapter->ptp_tx_skb) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + tx_flags |= IGB_TX_FLAGS_TSTAMP; + + adapter->ptp_tx_skb = skb_get(skb); + adapter->ptp_tx_start = jiffies; + if (adapter->hw.mac.type == e1000_82576) + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + if (vlan_tx_tag_present(skb)) { + tx_flags |= IGB_TX_FLAGS_VLAN; + tx_flags |= (vlan_tx_tag_get(skb) << IGB_TX_FLAGS_VLAN_SHIFT); + } + + /* record initial flags and protocol */ + first->tx_flags = tx_flags; + first->protocol = protocol; + + tso = igb_tso(tx_ring, first, &hdr_len); + if (tso < 0) + goto out_drop; + else if (!tso) + igb_tx_csum(tx_ring, first); + + igb_tx_map(tx_ring, first, hdr_len); + +#ifndef HAVE_TRANS_START_IN_QUEUE + netdev_ring(tx_ring)->trans_start = jiffies; + +#endif + /* Make sure there is space in the ring for the next send. */ + igb_maybe_stop_tx(tx_ring, DESC_NEEDED); + + return NETDEV_TX_OK; + +out_drop: + igb_unmap_and_free_tx_resource(tx_ring, first); + + return NETDEV_TX_OK; +} + +#ifdef HAVE_TX_MQ +static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter, + struct sk_buff *skb) +{ + unsigned int r_idx = skb->queue_mapping; + + if (r_idx >= adapter->num_tx_queues) + r_idx = r_idx % adapter->num_tx_queues; + + return adapter->tx_ring[r_idx]; +} +#else +#define igb_tx_queue_mapping(_adapter, _skb) (_adapter)->tx_ring[0] +#endif + +static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, + struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + if (skb->len <= 0) { + dev_kfree_skb_any(skb); + return NETDEV_TX_OK; + } + + /* + * The minimum packet size with TCTL.PSP set is 17 so pad the skb + * in order to meet this minimum size requirement. + */ + if (skb->len < 17) { + if (skb_padto(skb, 17)) + return NETDEV_TX_OK; + skb->len = 17; + } + + return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb)); +} + +/** + * igb_tx_timeout - Respond to a Tx Hang + * @netdev: network interface device structure + **/ +static void igb_tx_timeout(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + + /* Do the reset outside of interrupt context */ + adapter->tx_timeout_count++; + + if (hw->mac.type >= e1000_82580) + hw->dev_spec._82575.global_device_reset = true; + + schedule_work(&adapter->reset_task); + E1000_WRITE_REG(hw, E1000_EICS, + (adapter->eims_enable_mask & ~adapter->eims_other)); +} + +static void igb_reset_task(struct work_struct *work) +{ + struct igb_adapter *adapter; + adapter = container_of(work, struct igb_adapter, reset_task); + + igb_reinit_locked(adapter); +} + +/** + * igb_get_stats - Get System Network Statistics + * @netdev: network interface device structure + * + * Returns the address of the device statistics structure. + * The statistics are updated here and also from the timer callback. + **/ +static struct net_device_stats *igb_get_stats(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + + if (!test_bit(__IGB_RESETTING, &adapter->state)) + igb_update_stats(adapter); + +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + /* only return the current stats */ + return &netdev->stats; +#else + /* only return the current stats */ + return &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ +} + +/** + * igb_change_mtu - Change the Maximum Transfer Unit + * @netdev: network interface device structure + * @new_mtu: new value for maximum frame size + * + * Returns 0 on success, negative on failure + **/ +static int igb_change_mtu(struct net_device *netdev, int new_mtu) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; + int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + + if ((new_mtu < 68) || (max_frame > MAX_JUMBO_FRAME_SIZE)) { + dev_err(pci_dev_to_dev(pdev), "Invalid MTU setting\n"); + return -EINVAL; + } + +#define MAX_STD_JUMBO_FRAME_SIZE 9238 + if (max_frame > MAX_STD_JUMBO_FRAME_SIZE) { + dev_err(pci_dev_to_dev(pdev), "MTU > 9216 not supported.\n"); + return -EINVAL; + } + + /* adjust max frame to be at least the size of a standard frame */ + if (max_frame < (ETH_FRAME_LEN + ETH_FCS_LEN)) + max_frame = ETH_FRAME_LEN + ETH_FCS_LEN; + + while (test_and_set_bit(__IGB_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + /* igb_down has a dependency on max_frame_size */ + adapter->max_frame_size = max_frame; + + if (netif_running(netdev)) + igb_down(adapter); + + dev_info(pci_dev_to_dev(pdev), "changing MTU from %d to %d\n", + netdev->mtu, new_mtu); + netdev->mtu = new_mtu; + hw->dev_spec._82575.mtu = new_mtu; + + if (netif_running(netdev)) + igb_up(adapter); + else + igb_reset(adapter); + + clear_bit(__IGB_RESETTING, &adapter->state); + + return 0; +} + +/** + * igb_update_stats - Update the board statistics counters + * @adapter: board private structure + **/ + +void igb_update_stats(struct igb_adapter *adapter) +{ +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &adapter->netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ + struct e1000_hw *hw = &adapter->hw; +#ifdef HAVE_PCI_ERS + struct pci_dev *pdev = adapter->pdev; +#endif + u32 reg, mpc; + u16 phy_tmp; + int i; + u64 bytes, packets; +#ifndef IGB_NO_LRO + u32 flushed = 0, coal = 0; + struct igb_q_vector *q_vector; +#endif + +#define PHY_IDLE_ERROR_COUNT_MASK 0x00FF + + /* + * Prevent stats update while adapter is being reset, or if the pci + * connection is down. + */ + if (adapter->link_speed == 0) + return; +#ifdef HAVE_PCI_ERS + if (pci_channel_offline(pdev)) + return; + +#endif +#ifndef IGB_NO_LRO + for (i = 0; i < adapter->num_q_vectors; i++) { + q_vector = adapter->q_vector[i]; + if (!q_vector) + continue; + flushed += q_vector->lrolist.stats.flushed; + coal += q_vector->lrolist.stats.coal; + } + adapter->lro_stats.flushed = flushed; + adapter->lro_stats.coal = coal; + +#endif + bytes = 0; + packets = 0; + for (i = 0; i < adapter->num_rx_queues; i++) { + u32 rqdpc_tmp = E1000_READ_REG(hw, E1000_RQDPC(i)) & 0x0FFF; + struct igb_ring *ring = adapter->rx_ring[i]; + ring->rx_stats.drops += rqdpc_tmp; + net_stats->rx_fifo_errors += rqdpc_tmp; +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (!ring->vmdq_netdev) { + bytes += ring->rx_stats.bytes; + packets += ring->rx_stats.packets; + } +#else + bytes += ring->rx_stats.bytes; + packets += ring->rx_stats.packets; +#endif + } + + net_stats->rx_bytes = bytes; + net_stats->rx_packets = packets; + + bytes = 0; + packets = 0; + for (i = 0; i < adapter->num_tx_queues; i++) { + struct igb_ring *ring = adapter->tx_ring[i]; +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (!ring->vmdq_netdev) { + bytes += ring->tx_stats.bytes; + packets += ring->tx_stats.packets; + } +#else + bytes += ring->tx_stats.bytes; + packets += ring->tx_stats.packets; +#endif + } + net_stats->tx_bytes = bytes; + net_stats->tx_packets = packets; + + /* read stats registers */ + adapter->stats.crcerrs += E1000_READ_REG(hw, E1000_CRCERRS); + adapter->stats.gprc += E1000_READ_REG(hw, E1000_GPRC); + adapter->stats.gorc += E1000_READ_REG(hw, E1000_GORCL); + E1000_READ_REG(hw, E1000_GORCH); /* clear GORCL */ + adapter->stats.bprc += E1000_READ_REG(hw, E1000_BPRC); + adapter->stats.mprc += E1000_READ_REG(hw, E1000_MPRC); + adapter->stats.roc += E1000_READ_REG(hw, E1000_ROC); + + adapter->stats.prc64 += E1000_READ_REG(hw, E1000_PRC64); + adapter->stats.prc127 += E1000_READ_REG(hw, E1000_PRC127); + adapter->stats.prc255 += E1000_READ_REG(hw, E1000_PRC255); + adapter->stats.prc511 += E1000_READ_REG(hw, E1000_PRC511); + adapter->stats.prc1023 += E1000_READ_REG(hw, E1000_PRC1023); + adapter->stats.prc1522 += E1000_READ_REG(hw, E1000_PRC1522); + adapter->stats.symerrs += E1000_READ_REG(hw, E1000_SYMERRS); + adapter->stats.sec += E1000_READ_REG(hw, E1000_SEC); + + mpc = E1000_READ_REG(hw, E1000_MPC); + adapter->stats.mpc += mpc; + net_stats->rx_fifo_errors += mpc; + adapter->stats.scc += E1000_READ_REG(hw, E1000_SCC); + adapter->stats.ecol += E1000_READ_REG(hw, E1000_ECOL); + adapter->stats.mcc += E1000_READ_REG(hw, E1000_MCC); + adapter->stats.latecol += E1000_READ_REG(hw, E1000_LATECOL); + adapter->stats.dc += E1000_READ_REG(hw, E1000_DC); + adapter->stats.rlec += E1000_READ_REG(hw, E1000_RLEC); + adapter->stats.xonrxc += E1000_READ_REG(hw, E1000_XONRXC); + adapter->stats.xontxc += E1000_READ_REG(hw, E1000_XONTXC); + adapter->stats.xoffrxc += E1000_READ_REG(hw, E1000_XOFFRXC); + adapter->stats.xofftxc += E1000_READ_REG(hw, E1000_XOFFTXC); + adapter->stats.fcruc += E1000_READ_REG(hw, E1000_FCRUC); + adapter->stats.gptc += E1000_READ_REG(hw, E1000_GPTC); + adapter->stats.gotc += E1000_READ_REG(hw, E1000_GOTCL); + E1000_READ_REG(hw, E1000_GOTCH); /* clear GOTCL */ + adapter->stats.rnbc += E1000_READ_REG(hw, E1000_RNBC); + adapter->stats.ruc += E1000_READ_REG(hw, E1000_RUC); + adapter->stats.rfc += E1000_READ_REG(hw, E1000_RFC); + adapter->stats.rjc += E1000_READ_REG(hw, E1000_RJC); + adapter->stats.tor += E1000_READ_REG(hw, E1000_TORH); + adapter->stats.tot += E1000_READ_REG(hw, E1000_TOTH); + adapter->stats.tpr += E1000_READ_REG(hw, E1000_TPR); + + adapter->stats.ptc64 += E1000_READ_REG(hw, E1000_PTC64); + adapter->stats.ptc127 += E1000_READ_REG(hw, E1000_PTC127); + adapter->stats.ptc255 += E1000_READ_REG(hw, E1000_PTC255); + adapter->stats.ptc511 += E1000_READ_REG(hw, E1000_PTC511); + adapter->stats.ptc1023 += E1000_READ_REG(hw, E1000_PTC1023); + adapter->stats.ptc1522 += E1000_READ_REG(hw, E1000_PTC1522); + + adapter->stats.mptc += E1000_READ_REG(hw, E1000_MPTC); + adapter->stats.bptc += E1000_READ_REG(hw, E1000_BPTC); + + adapter->stats.tpt += E1000_READ_REG(hw, E1000_TPT); + adapter->stats.colc += E1000_READ_REG(hw, E1000_COLC); + + adapter->stats.algnerrc += E1000_READ_REG(hw, E1000_ALGNERRC); + /* read internal phy sepecific stats */ + reg = E1000_READ_REG(hw, E1000_CTRL_EXT); + if (!(reg & E1000_CTRL_EXT_LINK_MODE_MASK)) { + adapter->stats.rxerrc += E1000_READ_REG(hw, E1000_RXERRC); + + /* this stat has invalid values on i210/i211 */ + if ((hw->mac.type != e1000_i210) && + (hw->mac.type != e1000_i211)) + adapter->stats.tncrs += E1000_READ_REG(hw, E1000_TNCRS); + } + adapter->stats.tsctc += E1000_READ_REG(hw, E1000_TSCTC); + adapter->stats.tsctfc += E1000_READ_REG(hw, E1000_TSCTFC); + + adapter->stats.iac += E1000_READ_REG(hw, E1000_IAC); + adapter->stats.icrxoc += E1000_READ_REG(hw, E1000_ICRXOC); + adapter->stats.icrxptc += E1000_READ_REG(hw, E1000_ICRXPTC); + adapter->stats.icrxatc += E1000_READ_REG(hw, E1000_ICRXATC); + adapter->stats.ictxptc += E1000_READ_REG(hw, E1000_ICTXPTC); + adapter->stats.ictxatc += E1000_READ_REG(hw, E1000_ICTXATC); + adapter->stats.ictxqec += E1000_READ_REG(hw, E1000_ICTXQEC); + adapter->stats.ictxqmtc += E1000_READ_REG(hw, E1000_ICTXQMTC); + adapter->stats.icrxdmtc += E1000_READ_REG(hw, E1000_ICRXDMTC); + + /* Fill out the OS statistics structure */ + net_stats->multicast = adapter->stats.mprc; + net_stats->collisions = adapter->stats.colc; + + /* Rx Errors */ + + /* RLEC on some newer hardware can be incorrect so build + * our own version based on RUC and ROC */ + net_stats->rx_errors = adapter->stats.rxerrc + + adapter->stats.crcerrs + adapter->stats.algnerrc + + adapter->stats.ruc + adapter->stats.roc + + adapter->stats.cexterr; + net_stats->rx_length_errors = adapter->stats.ruc + + adapter->stats.roc; + net_stats->rx_crc_errors = adapter->stats.crcerrs; + net_stats->rx_frame_errors = adapter->stats.algnerrc; + net_stats->rx_missed_errors = adapter->stats.mpc; + + /* Tx Errors */ + net_stats->tx_errors = adapter->stats.ecol + + adapter->stats.latecol; + net_stats->tx_aborted_errors = adapter->stats.ecol; + net_stats->tx_window_errors = adapter->stats.latecol; + net_stats->tx_carrier_errors = adapter->stats.tncrs; + + /* Tx Dropped needs to be maintained elsewhere */ + + /* Phy Stats */ + if (hw->phy.media_type == e1000_media_type_copper) { + if ((adapter->link_speed == SPEED_1000) && + (!e1000_read_phy_reg(hw, PHY_1000T_STATUS, &phy_tmp))) { + phy_tmp &= PHY_IDLE_ERROR_COUNT_MASK; + adapter->phy_stats.idle_errors += phy_tmp; + } + } + + /* Management Stats */ + adapter->stats.mgptc += E1000_READ_REG(hw, E1000_MGTPTC); + adapter->stats.mgprc += E1000_READ_REG(hw, E1000_MGTPRC); + if (hw->mac.type > e1000_82580) { + adapter->stats.o2bgptc += E1000_READ_REG(hw, E1000_O2BGPTC); + adapter->stats.o2bspc += E1000_READ_REG(hw, E1000_O2BSPC); + adapter->stats.b2ospc += E1000_READ_REG(hw, E1000_B2OSPC); + adapter->stats.b2ogprc += E1000_READ_REG(hw, E1000_B2OGPRC); + } +} + +static irqreturn_t igb_msix_other(int irq, void *data) +{ + struct igb_adapter *adapter = data; + struct e1000_hw *hw = &adapter->hw; + u32 icr = E1000_READ_REG(hw, E1000_ICR); + /* reading ICR causes bit 31 of EICR to be cleared */ + + if (icr & E1000_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + /* The DMA Out of Sync is also indication of a spoof event + * in IOV mode. Check the Wrong VM Behavior register to + * see if it is really a spoof event. */ + igb_check_wvbr(adapter); + } + + /* Check for a mailbox event */ + if (icr & E1000_ICR_VMMB) + igb_msg_task(adapter); + + if (icr & E1000_ICR_LSC) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + +#ifdef HAVE_PTP_1588_CLOCK + if (icr & E1000_ICR_TS) { + u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + /* Check for MDD event */ + if (icr & E1000_ICR_MDDET) + igb_process_mdd_event(adapter); + + E1000_WRITE_REG(hw, E1000_EIMS, adapter->eims_other); + + return IRQ_HANDLED; +} + +static void igb_write_itr(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + u32 itr_val = q_vector->itr_val & 0x7FFC; + + if (!q_vector->set_itr) + return; + + if (!itr_val) + itr_val = 0x4; + + if (adapter->hw.mac.type == e1000_82575) + itr_val |= itr_val << 16; + else + itr_val |= E1000_EITR_CNT_IGNR; + + writel(itr_val, q_vector->itr_register); + q_vector->set_itr = 0; +} + +static irqreturn_t igb_msix_ring(int irq, void *data) +{ + struct igb_q_vector *q_vector = data; + + /* Write the ITR value calculated from the previous interrupt. */ + igb_write_itr(q_vector); + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +#ifdef IGB_DCA +static void igb_update_tx_dca(struct igb_adapter *adapter, + struct igb_ring *tx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 txctrl = dca3_get_tag(tx_ring->dev, cpu); + + if (hw->mac.type != e1000_82575) + txctrl <<= E1000_DCA_TXCTRL_CPUID_SHIFT_82576; + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + txctrl |= E1000_DCA_TXCTRL_DESC_RRO_EN | + E1000_DCA_TXCTRL_DATA_RRO_EN | + E1000_DCA_TXCTRL_DESC_DCA_EN; + + E1000_WRITE_REG(hw, E1000_DCA_TXCTRL(tx_ring->reg_idx), txctrl); +} + +static void igb_update_rx_dca(struct igb_adapter *adapter, + struct igb_ring *rx_ring, + int cpu) +{ + struct e1000_hw *hw = &adapter->hw; + u32 rxctrl = dca3_get_tag(&adapter->pdev->dev, cpu); + + if (hw->mac.type != e1000_82575) + rxctrl <<= E1000_DCA_RXCTRL_CPUID_SHIFT_82576; + + /* + * We can enable relaxed ordering for reads, but not writes when + * DCA is enabled. This is due to a known issue in some chipsets + * which will cause the DCA tag to be cleared. + */ + rxctrl |= E1000_DCA_RXCTRL_DESC_RRO_EN | + E1000_DCA_RXCTRL_DESC_DCA_EN; + + E1000_WRITE_REG(hw, E1000_DCA_RXCTRL(rx_ring->reg_idx), rxctrl); +} + +static void igb_update_dca(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + int cpu = get_cpu(); + + if (q_vector->cpu == cpu) + goto out_no_update; + + if (q_vector->tx.ring) + igb_update_tx_dca(adapter, q_vector->tx.ring, cpu); + + if (q_vector->rx.ring) + igb_update_rx_dca(adapter, q_vector->rx.ring, cpu); + + q_vector->cpu = cpu; +out_no_update: + put_cpu(); +} + +static void igb_setup_dca(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + if (!(adapter->flags & IGB_FLAG_DCA_ENABLED)) + return; + + /* Always use CB2 mode, difference is masked in the CB driver. */ + E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_MODE_CB2); + + for (i = 0; i < adapter->num_q_vectors; i++) { + adapter->q_vector[i]->cpu = -1; + igb_update_dca(adapter->q_vector[i]); + } +} + +static int __igb_notify_dca(struct device *dev, void *data) +{ + struct net_device *netdev = dev_get_drvdata(dev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct pci_dev *pdev = adapter->pdev; + struct e1000_hw *hw = &adapter->hw; + unsigned long event = *(unsigned long *)data; + + switch (event) { + case DCA_PROVIDER_ADD: + /* if already enabled, don't do it again */ + if (adapter->flags & IGB_FLAG_DCA_ENABLED) + break; + if (dca_add_requester(dev) == E1000_SUCCESS) { + adapter->flags |= IGB_FLAG_DCA_ENABLED; + dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); + igb_setup_dca(adapter); + break; + } + /* Fall Through since DCA is disabled. */ + case DCA_PROVIDER_REMOVE: + if (adapter->flags & IGB_FLAG_DCA_ENABLED) { + /* without this a class_device is left + * hanging around in the sysfs model */ + dca_remove_requester(dev); + dev_info(pci_dev_to_dev(pdev), "DCA disabled\n"); + adapter->flags &= ~IGB_FLAG_DCA_ENABLED; + E1000_WRITE_REG(hw, E1000_DCA_CTRL, E1000_DCA_CTRL_DCA_DISABLE); + } + break; + } + + return E1000_SUCCESS; +} + +static int igb_notify_dca(struct notifier_block *nb, unsigned long event, + void *p) +{ + int ret_val; + + ret_val = driver_for_each_device(&igb_driver.driver, NULL, &event, + __igb_notify_dca); + + return ret_val ? NOTIFY_BAD : NOTIFY_DONE; +} +#endif /* IGB_DCA */ + +static int igb_vf_configure(struct igb_adapter *adapter, int vf) +{ + unsigned char mac_addr[ETH_ALEN]; + + random_ether_addr(mac_addr); + igb_set_vf_mac(adapter, vf, mac_addr); + +#ifdef IFLA_VF_MAX +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + /* By default spoof check is enabled for all VFs */ + adapter->vf_data[vf].spoofchk_enabled = true; +#endif +#endif + + return true; +} + +static void igb_ping_all_vfs(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 ping; + int i; + + for (i = 0 ; i < adapter->vfs_allocated_count; i++) { + ping = E1000_PF_CONTROL_MSG; + if (adapter->vf_data[i].flags & IGB_VF_FLAG_CTS) + ping |= E1000_VT_MSGTYPE_CTS; + e1000_write_mbx(hw, &ping, 1, i); + } +} + +/** + * igb_mta_set_ - Set multicast filter table address + * @adapter: pointer to the adapter structure + * @hash_value: determines the MTA register and bit to set + * + * The multicast table address is a register array of 32-bit registers. + * The hash_value is used to determine what register the bit is in, the + * current value is read, the new bit is OR'd in and the new value is + * written back into the register. + **/ +void igb_mta_set(struct igb_adapter *adapter, u32 hash_value) +{ + struct e1000_hw *hw = &adapter->hw; + u32 hash_bit, hash_reg, mta; + + /* + * The MTA is a register array of 32-bit registers. It is + * treated like an array of (32*mta_reg_count) bits. We want to + * set bit BitArray[hash_value]. So we figure out what register + * the bit is in, read it, OR in the new bit, then write + * back the new value. The (hw->mac.mta_reg_count - 1) serves as a + * mask to bits 31:5 of the hash value which gives us the + * register we're modifying. The hash bit within that register + * is determined by the lower 5 bits of the hash value. + */ + hash_reg = (hash_value >> 5) & (hw->mac.mta_reg_count - 1); + hash_bit = hash_value & 0x1F; + + mta = E1000_READ_REG_ARRAY(hw, E1000_MTA, hash_reg); + + mta |= (1 << hash_bit); + + E1000_WRITE_REG_ARRAY(hw, E1000_MTA, hash_reg, mta); + E1000_WRITE_FLUSH(hw); +} + +static int igb_set_vf_promisc(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + + struct e1000_hw *hw = &adapter->hw; + u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(vf)); + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + + vf_data->flags &= ~(IGB_VF_FLAG_UNI_PROMISC | + IGB_VF_FLAG_MULTI_PROMISC); + vmolr &= ~(E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); + +#ifdef IGB_ENABLE_VF_PROMISC + if (*msgbuf & E1000_VF_SET_PROMISC_UNICAST) { + vmolr |= E1000_VMOLR_ROPE; + vf_data->flags |= IGB_VF_FLAG_UNI_PROMISC; + *msgbuf &= ~E1000_VF_SET_PROMISC_UNICAST; + } +#endif + if (*msgbuf & E1000_VF_SET_PROMISC_MULTICAST) { + vmolr |= E1000_VMOLR_MPME; + vf_data->flags |= IGB_VF_FLAG_MULTI_PROMISC; + *msgbuf &= ~E1000_VF_SET_PROMISC_MULTICAST; + } else { + /* + * if we have hashes and we are clearing a multicast promisc + * flag we need to write the hashes to the MTA as this step + * was previously skipped + */ + if (vf_data->num_vf_mc_hashes > 30) { + vmolr |= E1000_VMOLR_MPME; + } else if (vf_data->num_vf_mc_hashes) { + int j; + vmolr |= E1000_VMOLR_ROMPE; + for (j = 0; j < vf_data->num_vf_mc_hashes; j++) + igb_mta_set(adapter, vf_data->vf_mc_hashes[j]); + } + } + + E1000_WRITE_REG(hw, E1000_VMOLR(vf), vmolr); + + /* there are flags left unprocessed, likely not supported */ + if (*msgbuf & E1000_VT_MSGINFO_MASK) + return -EINVAL; + + return 0; + +} + +static int igb_set_vf_multicasts(struct igb_adapter *adapter, + u32 *msgbuf, u32 vf) +{ + int n = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + u16 *hash_list = (u16 *)&msgbuf[1]; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + int i; + + /* salt away the number of multicast addresses assigned + * to this VF for later use to restore when the PF multi cast + * list changes + */ + vf_data->num_vf_mc_hashes = n; + + /* only up to 30 hash values supported */ + if (n > 30) + n = 30; + + /* store the hashes for later use */ + for (i = 0; i < n; i++) + vf_data->vf_mc_hashes[i] = hash_list[i]; + + /* Flush and reset the mta with the new values */ + igb_set_rx_mode(adapter->netdev); + + return 0; +} + +static void igb_restore_vf_multicasts(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data; + int i, j; + + for (i = 0; i < adapter->vfs_allocated_count; i++) { + u32 vmolr = E1000_READ_REG(hw, E1000_VMOLR(i)); + vmolr &= ~(E1000_VMOLR_ROMPE | E1000_VMOLR_MPME); + + vf_data = &adapter->vf_data[i]; + + if ((vf_data->num_vf_mc_hashes > 30) || + (vf_data->flags & IGB_VF_FLAG_MULTI_PROMISC)) { + vmolr |= E1000_VMOLR_MPME; + } else if (vf_data->num_vf_mc_hashes) { + vmolr |= E1000_VMOLR_ROMPE; + for (j = 0; j < vf_data->num_vf_mc_hashes; j++) + igb_mta_set(adapter, vf_data->vf_mc_hashes[j]); + } + E1000_WRITE_REG(hw, E1000_VMOLR(i), vmolr); + } +} + +static void igb_clear_vf_vfta(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 pool_mask, reg, vid; + u16 vlan_default; + int i; + + pool_mask = 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + + /* remove the vf from the pool */ + reg &= ~pool_mask; + + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK) && + (reg & E1000_VLVF_VLANID_ENABLE)) { + reg = 0; + vid = reg & E1000_VLVF_VLANID_MASK; + igb_vfta_set(adapter, vid, FALSE); + } + + E1000_WRITE_REG(hw, E1000_VLVF(i), reg); + } + + adapter->vf_data[vf].vlans_enabled = 0; + + vlan_default = adapter->vf_data[vf].default_vf_vlan_id; + if (vlan_default) + igb_vlvf_set(adapter, vlan_default, true, vf); +} + +s32 igb_vlvf_set(struct igb_adapter *adapter, u32 vid, bool add, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + u32 reg, i; + + /* The vlvf table only exists on 82576 hardware and newer */ + if (hw->mac.type < e1000_82576) + return -1; + + /* we only need to do this if VMDq is enabled */ + if (!adapter->vmdq_pools) + return -1; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (add) { + if (i == E1000_VLVF_ARRAY_SIZE) { + /* Did not find a matching VLAN ID entry that was + * enabled. Search for a free filter entry, i.e. + * one without the enable bit set + */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + if (!(reg & E1000_VLVF_VLANID_ENABLE)) + break; + } + } + if (i < E1000_VLVF_ARRAY_SIZE) { + /* Found an enabled/available entry */ + reg |= 1 << (E1000_VLVF_POOLSEL_SHIFT + vf); + + /* if !enabled we need to set this up in vfta */ + if (!(reg & E1000_VLVF_VLANID_ENABLE)) { + /* add VID to filter table */ + igb_vfta_set(adapter, vid, TRUE); + reg |= E1000_VLVF_VLANID_ENABLE; + } + reg &= ~E1000_VLVF_VLANID_MASK; + reg |= vid; + E1000_WRITE_REG(hw, E1000_VLVF(i), reg); + + /* do not modify RLPML for PF devices */ + if (vf >= adapter->vfs_allocated_count) + return E1000_SUCCESS; + + if (!adapter->vf_data[vf].vlans_enabled) { + u32 size; + reg = E1000_READ_REG(hw, E1000_VMOLR(vf)); + size = reg & E1000_VMOLR_RLPML_MASK; + size += 4; + reg &= ~E1000_VMOLR_RLPML_MASK; + reg |= size; + E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg); + } + + adapter->vf_data[vf].vlans_enabled++; + } + } else { + if (i < E1000_VLVF_ARRAY_SIZE) { + /* remove vf from the pool */ + reg &= ~(1 << (E1000_VLVF_POOLSEL_SHIFT + vf)); + /* if pool is empty then remove entry from vfta */ + if (!(reg & E1000_VLVF_POOLSEL_MASK)) { + reg = 0; + igb_vfta_set(adapter, vid, FALSE); + } + E1000_WRITE_REG(hw, E1000_VLVF(i), reg); + + /* do not modify RLPML for PF devices */ + if (vf >= adapter->vfs_allocated_count) + return E1000_SUCCESS; + + adapter->vf_data[vf].vlans_enabled--; + if (!adapter->vf_data[vf].vlans_enabled) { + u32 size; + reg = E1000_READ_REG(hw, E1000_VMOLR(vf)); + size = reg & E1000_VMOLR_RLPML_MASK; + size -= 4; + reg &= ~E1000_VMOLR_RLPML_MASK; + reg |= size; + E1000_WRITE_REG(hw, E1000_VMOLR(vf), reg); + } + } + } + return E1000_SUCCESS; +} + +#ifdef IFLA_VF_MAX +static void igb_set_vmvir(struct igb_adapter *adapter, u32 vid, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + + if (vid) + E1000_WRITE_REG(hw, E1000_VMVIR(vf), (vid | E1000_VMVIR_VLANA_DEFAULT)); + else + E1000_WRITE_REG(hw, E1000_VMVIR(vf), 0); +} + +static int igb_ndo_set_vf_vlan(struct net_device *netdev, + int vf, u16 vlan, u8 qos) +{ + int err = 0; + struct igb_adapter *adapter = netdev_priv(netdev); + + /* VLAN IDs accepted range 0-4094 */ + if ((vf >= adapter->vfs_allocated_count) || (vlan > VLAN_VID_MASK-1) || (qos > 7)) + return -EINVAL; + if (vlan || qos) { + err = igb_vlvf_set(adapter, vlan, !!vlan, vf); + if (err) + goto out; + igb_set_vmvir(adapter, vlan | (qos << VLAN_PRIO_SHIFT), vf); + igb_set_vmolr(adapter, vf, !vlan); + adapter->vf_data[vf].pf_vlan = vlan; + adapter->vf_data[vf].pf_qos = qos; + igb_set_vf_vlan_strip(adapter, vf, true); + dev_info(&adapter->pdev->dev, + "Setting VLAN %d, QOS 0x%x on VF %d\n", vlan, qos, vf); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, + "The VF VLAN has been set," + " but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, + "Bring the PF device up before" + " attempting to use the VF device.\n"); + } + } else { + if (adapter->vf_data[vf].pf_vlan) + dev_info(&adapter->pdev->dev, + "Clearing VLAN on VF %d\n", vf); + igb_vlvf_set(adapter, adapter->vf_data[vf].pf_vlan, + false, vf); + igb_set_vmvir(adapter, vlan, vf); + igb_set_vmolr(adapter, vf, true); + igb_set_vf_vlan_strip(adapter, vf, false); + adapter->vf_data[vf].pf_vlan = 0; + adapter->vf_data[vf].pf_qos = 0; + } +out: + return err; +} + +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE +static int igb_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, + bool setting) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 dtxswc, reg_offset; + + if (!adapter->vfs_allocated_count) + return -EOPNOTSUPP; + + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + + reg_offset = (hw->mac.type == e1000_82576) ? E1000_DTXSWC : E1000_TXSWC; + dtxswc = E1000_READ_REG(hw, reg_offset); + if (setting) + dtxswc |= ((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + else + dtxswc &= ~((1 << vf) | + (1 << (vf + E1000_DTXSWC_VLAN_SPOOF_SHIFT))); + E1000_WRITE_REG(hw, reg_offset, dtxswc); + + adapter->vf_data[vf].spoofchk_enabled = setting; + return E1000_SUCCESS; +} +#endif /* HAVE_VF_SPOOFCHK_CONFIGURE */ +#endif /* IFLA_VF_MAX */ + +static int igb_find_vlvf_entry(struct igb_adapter *adapter, int vid) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + u32 reg; + + /* Find the vlan filter for this id */ + for (i = 0; i < E1000_VLVF_ARRAY_SIZE; i++) { + reg = E1000_READ_REG(hw, E1000_VLVF(i)); + if ((reg & E1000_VLVF_VLANID_ENABLE) && + vid == (reg & E1000_VLVF_VLANID_MASK)) + break; + } + + if (i >= E1000_VLVF_ARRAY_SIZE) + i = -1; + + return i; +} + +static int igb_set_vf_vlan(struct igb_adapter *adapter, u32 *msgbuf, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + int add = (msgbuf[0] & E1000_VT_MSGINFO_MASK) >> E1000_VT_MSGINFO_SHIFT; + int vid = (msgbuf[1] & E1000_VLVF_VLANID_MASK); + int err = 0; + + if (vid) + igb_set_vf_vlan_strip(adapter, vf, true); + else + igb_set_vf_vlan_strip(adapter, vf, false); + + /* If in promiscuous mode we need to make sure the PF also has + * the VLAN filter set. + */ + if (add && (adapter->netdev->flags & IFF_PROMISC)) + err = igb_vlvf_set(adapter, vid, add, + adapter->vfs_allocated_count); + if (err) + goto out; + + err = igb_vlvf_set(adapter, vid, add, vf); + + if (err) + goto out; + + /* Go through all the checks to see if the VLAN filter should + * be wiped completely. + */ + if (!add && (adapter->netdev->flags & IFF_PROMISC)) { + u32 vlvf, bits; + + int regndx = igb_find_vlvf_entry(adapter, vid); + if (regndx < 0) + goto out; + /* See if any other pools are set for this VLAN filter + * entry other than the PF. + */ + vlvf = bits = E1000_READ_REG(hw, E1000_VLVF(regndx)); + bits &= 1 << (E1000_VLVF_POOLSEL_SHIFT + + adapter->vfs_allocated_count); + /* If the filter was removed then ensure PF pool bit + * is cleared if the PF only added itself to the pool + * because the PF is in promiscuous mode. + */ + if ((vlvf & VLAN_VID_MASK) == vid && +#ifndef HAVE_VLAN_RX_REGISTER + !test_bit(vid, adapter->active_vlans) && +#endif + !bits) + igb_vlvf_set(adapter, vid, add, + adapter->vfs_allocated_count); + } + +out: + return err; +} + +static inline void igb_vf_reset(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + + /* clear flags except flag that the PF has set the MAC */ + adapter->vf_data[vf].flags &= IGB_VF_FLAG_PF_SET_MAC; + adapter->vf_data[vf].last_nack = jiffies; + + /* reset offloads to defaults */ + igb_set_vmolr(adapter, vf, true); + + /* reset vlans for device */ + igb_clear_vf_vfta(adapter, vf); +#ifdef IFLA_VF_MAX + if (adapter->vf_data[vf].pf_vlan) + igb_ndo_set_vf_vlan(adapter->netdev, vf, + adapter->vf_data[vf].pf_vlan, + adapter->vf_data[vf].pf_qos); + else + igb_clear_vf_vfta(adapter, vf); +#endif + + /* reset multicast table array for vf */ + adapter->vf_data[vf].num_vf_mc_hashes = 0; + + /* Flush and reset the mta with the new values */ + igb_set_rx_mode(adapter->netdev); + + /* + * Reset the VFs TDWBAL and TDWBAH registers which are not + * cleared by a VFLR + */ + E1000_WRITE_REG(hw, E1000_TDWBAH(vf), 0); + E1000_WRITE_REG(hw, E1000_TDWBAL(vf), 0); + if (hw->mac.type == e1000_82576) { + E1000_WRITE_REG(hw, E1000_TDWBAH(IGB_MAX_VF_FUNCTIONS + vf), 0); + E1000_WRITE_REG(hw, E1000_TDWBAL(IGB_MAX_VF_FUNCTIONS + vf), 0); + } +} + +static void igb_vf_reset_event(struct igb_adapter *adapter, u32 vf) +{ + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; + + /* generate a new mac address as we were hotplug removed/added */ + if (!(adapter->vf_data[vf].flags & IGB_VF_FLAG_PF_SET_MAC)) + random_ether_addr(vf_mac); + + /* process remaining reset events */ + igb_vf_reset(adapter, vf); +} + +static void igb_vf_reset_msg(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + unsigned char *vf_mac = adapter->vf_data[vf].vf_mac_addresses; + u32 reg, msgbuf[3]; + u8 *addr = (u8 *)(&msgbuf[1]); + + /* process all the same items cleared in a function level reset */ + igb_vf_reset(adapter, vf); + + /* set vf mac address */ + igb_del_mac_filter(adapter, vf_mac, vf); + igb_add_mac_filter(adapter, vf_mac, vf); + + /* enable transmit and receive for vf */ + reg = E1000_READ_REG(hw, E1000_VFTE); + E1000_WRITE_REG(hw, E1000_VFTE, reg | (1 << vf)); + reg = E1000_READ_REG(hw, E1000_VFRE); + E1000_WRITE_REG(hw, E1000_VFRE, reg | (1 << vf)); + + adapter->vf_data[vf].flags |= IGB_VF_FLAG_CTS; + + /* reply to reset with ack and vf mac address */ + msgbuf[0] = E1000_VF_RESET | E1000_VT_MSGTYPE_ACK; + memcpy(addr, vf_mac, 6); + e1000_write_mbx(hw, msgbuf, 3, vf); +} + +static int igb_set_vf_mac_addr(struct igb_adapter *adapter, u32 *msg, int vf) +{ + /* + * The VF MAC Address is stored in a packed array of bytes + * starting at the second 32 bit word of the msg array + */ + unsigned char *addr = (unsigned char *)&msg[1]; + int err = -1; + + if (is_valid_ether_addr(addr)) + err = igb_set_vf_mac(adapter, vf, addr); + + return err; +} + +static void igb_rcv_ack_from_vf(struct igb_adapter *adapter, u32 vf) +{ + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + u32 msg = E1000_VT_MSGTYPE_NACK; + + /* if device isn't clear to send it shouldn't be reading either */ + if (!(vf_data->flags & IGB_VF_FLAG_CTS) && + time_after(jiffies, vf_data->last_nack + (2 * HZ))) { + e1000_write_mbx(hw, &msg, 1, vf); + vf_data->last_nack = jiffies; + } +} + +static void igb_rcv_msg_from_vf(struct igb_adapter *adapter, u32 vf) +{ + struct pci_dev *pdev = adapter->pdev; + u32 msgbuf[E1000_VFMAILBOX_SIZE]; + struct e1000_hw *hw = &adapter->hw; + struct vf_data_storage *vf_data = &adapter->vf_data[vf]; + s32 retval; + + retval = e1000_read_mbx(hw, msgbuf, E1000_VFMAILBOX_SIZE, vf); + + if (retval) { + dev_err(pci_dev_to_dev(pdev), "Error receiving message from VF\n"); + return; + } + + /* this is a message we already processed, do nothing */ + if (msgbuf[0] & (E1000_VT_MSGTYPE_ACK | E1000_VT_MSGTYPE_NACK)) + return; + + /* + * until the vf completes a reset it should not be + * allowed to start any configuration. + */ + + if (msgbuf[0] == E1000_VF_RESET) { + igb_vf_reset_msg(adapter, vf); + return; + } + + if (!(vf_data->flags & IGB_VF_FLAG_CTS)) { + msgbuf[0] = E1000_VT_MSGTYPE_NACK; + if (time_after(jiffies, vf_data->last_nack + (2 * HZ))) { + e1000_write_mbx(hw, msgbuf, 1, vf); + vf_data->last_nack = jiffies; + } + return; + } + + switch ((msgbuf[0] & 0xFFFF)) { + case E1000_VF_SET_MAC_ADDR: + retval = -EINVAL; +#ifndef IGB_DISABLE_VF_MAC_SET + if (!(vf_data->flags & IGB_VF_FLAG_PF_SET_MAC)) + retval = igb_set_vf_mac_addr(adapter, msgbuf, vf); + else + DPRINTK(DRV, INFO, + "VF %d attempted to override administratively " + "set MAC address\nReload the VF driver to " + "resume operations\n", vf); +#endif + break; + case E1000_VF_SET_PROMISC: + retval = igb_set_vf_promisc(adapter, msgbuf, vf); + break; + case E1000_VF_SET_MULTICAST: + retval = igb_set_vf_multicasts(adapter, msgbuf, vf); + break; + case E1000_VF_SET_LPE: + retval = igb_set_vf_rlpml(adapter, msgbuf[1], vf); + break; + case E1000_VF_SET_VLAN: + retval = -1; +#ifdef IFLA_VF_MAX + if (vf_data->pf_vlan) + DPRINTK(DRV, INFO, + "VF %d attempted to override administratively " + "set VLAN tag\nReload the VF driver to " + "resume operations\n", vf); + else +#endif + retval = igb_set_vf_vlan(adapter, msgbuf, vf); + break; + default: + dev_err(pci_dev_to_dev(pdev), "Unhandled Msg %08x\n", msgbuf[0]); + retval = -E1000_ERR_MBX; + break; + } + + /* notify the VF of the results of what it sent us */ + if (retval) + msgbuf[0] |= E1000_VT_MSGTYPE_NACK; + else + msgbuf[0] |= E1000_VT_MSGTYPE_ACK; + + msgbuf[0] |= E1000_VT_MSGTYPE_CTS; + + e1000_write_mbx(hw, msgbuf, 1, vf); +} + +static void igb_msg_task(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + u32 vf; + + for (vf = 0; vf < adapter->vfs_allocated_count; vf++) { + /* process any reset requests */ + if (!e1000_check_for_rst(hw, vf)) + igb_vf_reset_event(adapter, vf); + + /* process any messages pending */ + if (!e1000_check_for_msg(hw, vf)) + igb_rcv_msg_from_vf(adapter, vf); + + /* process any acks */ + if (!e1000_check_for_ack(hw, vf)) + igb_rcv_ack_from_vf(adapter, vf); + } +} + +/** + * igb_set_uta - Set unicast filter table address + * @adapter: board private structure + * + * The unicast table address is a register array of 32-bit registers. + * The table is meant to be used in a way similar to how the MTA is used + * however due to certain limitations in the hardware it is necessary to + * set all the hash bits to 1 and use the VMOLR ROPE bit as a promiscuous + * enable bit to allow vlan tag stripping when promiscuous mode is enabled + **/ +static void igb_set_uta(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + /* The UTA table only exists on 82576 hardware and newer */ + if (hw->mac.type < e1000_82576) + return; + + /* we only need to do this if VMDq is enabled */ + if (!adapter->vmdq_pools) + return; + + for (i = 0; i < hw->mac.uta_reg_count; i++) + E1000_WRITE_REG_ARRAY(hw, E1000_UTA, i, ~0); +} + +/** + * igb_intr_msi - Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + **/ +static irqreturn_t igb_intr_msi(int irq, void *data) +{ + struct igb_adapter *adapter = data; + struct igb_q_vector *q_vector = adapter->q_vector[0]; + struct e1000_hw *hw = &adapter->hw; + /* read ICR disables interrupts using IAM */ + u32 icr = E1000_READ_REG(hw, E1000_ICR); + + igb_write_itr(q_vector); + + if (icr & E1000_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + hw->mac.get_link_status = 1; + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + +#ifdef HAVE_PTP_1588_CLOCK + if (icr & E1000_ICR_TS) { + u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +/** + * igb_intr - Legacy Interrupt Handler + * @irq: interrupt number + * @data: pointer to a network interface device structure + **/ +static irqreturn_t igb_intr(int irq, void *data) +{ + struct igb_adapter *adapter = data; + struct igb_q_vector *q_vector = adapter->q_vector[0]; + struct e1000_hw *hw = &adapter->hw; + /* Interrupt Auto-Mask...upon reading ICR, interrupts are masked. No + * need for the IMC write */ + u32 icr = E1000_READ_REG(hw, E1000_ICR); + + /* IMS will not auto-mask if INT_ASSERTED is not set, and if it is + * not set, then the adapter didn't send an interrupt */ + if (!(icr & E1000_ICR_INT_ASSERTED)) + return IRQ_NONE; + + igb_write_itr(q_vector); + + if (icr & E1000_ICR_DRSTA) + schedule_work(&adapter->reset_task); + + if (icr & E1000_ICR_DOUTSYNC) { + /* HW is reporting DMA is out of sync */ + adapter->stats.doosync++; + } + + if (icr & (E1000_ICR_RXSEQ | E1000_ICR_LSC)) { + hw->mac.get_link_status = 1; + /* guard against interrupt when we're going down */ + if (!test_bit(__IGB_DOWN, &adapter->state)) + mod_timer(&adapter->watchdog_timer, jiffies + 1); + } + +#ifdef HAVE_PTP_1588_CLOCK + if (icr & E1000_ICR_TS) { + u32 tsicr = E1000_READ_REG(hw, E1000_TSICR); + + if (tsicr & E1000_TSICR_TXTS) { + /* acknowledge the interrupt */ + E1000_WRITE_REG(hw, E1000_TSICR, E1000_TSICR_TXTS); + /* retrieve hardware timestamp */ + schedule_work(&adapter->ptp_tx_work); + } + } +#endif /* HAVE_PTP_1588_CLOCK */ + + napi_schedule(&q_vector->napi); + + return IRQ_HANDLED; +} + +void igb_ring_irq_enable(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + + if ((q_vector->rx.ring && (adapter->rx_itr_setting & 3)) || + (!q_vector->rx.ring && (adapter->tx_itr_setting & 3))) { + if ((adapter->num_q_vectors == 1) && !adapter->vf_data) + igb_set_itr(q_vector); + else + igb_update_ring_itr(q_vector); + } + + if (!test_bit(__IGB_DOWN, &adapter->state)) { + if (adapter->msix_entries) + E1000_WRITE_REG(hw, E1000_EIMS, q_vector->eims_value); + else + igb_irq_enable(adapter); + } +} + +/** + * igb_poll - NAPI Rx polling callback + * @napi: napi polling structure + * @budget: count of how many packets we should handle + **/ +static int igb_poll(struct napi_struct *napi, int budget) +{ + struct igb_q_vector *q_vector = container_of(napi, struct igb_q_vector, napi); + bool clean_complete = true; + +#ifdef IGB_DCA + if (q_vector->adapter->flags & IGB_FLAG_DCA_ENABLED) + igb_update_dca(q_vector); +#endif + if (q_vector->tx.ring) + clean_complete = igb_clean_tx_irq(q_vector); + + if (q_vector->rx.ring) + clean_complete &= igb_clean_rx_irq(q_vector, budget); + +#ifndef HAVE_NETDEV_NAPI_LIST + /* if netdev is disabled we need to stop polling */ + if (!netif_running(q_vector->adapter->netdev)) + clean_complete = true; + +#endif + /* If all work not completed, return budget and keep polling */ + if (!clean_complete) + return budget; + + /* If not enough Rx work done, exit the polling mode */ + napi_complete(napi); + igb_ring_irq_enable(q_vector); + + return 0; +} + +/** + * igb_clean_tx_irq - Reclaim resources after transmit completes + * @q_vector: pointer to q_vector containing needed info + * returns TRUE if ring is completely cleaned + **/ +static bool igb_clean_tx_irq(struct igb_q_vector *q_vector) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct igb_ring *tx_ring = q_vector->tx.ring; + struct igb_tx_buffer *tx_buffer; + union e1000_adv_tx_desc *tx_desc; + unsigned int total_bytes = 0, total_packets = 0; + unsigned int budget = q_vector->tx.work_limit; + unsigned int i = tx_ring->next_to_clean; + + if (test_bit(__IGB_DOWN, &adapter->state)) + return true; + + tx_buffer = &tx_ring->tx_buffer_info[i]; + tx_desc = IGB_TX_DESC(tx_ring, i); + i -= tx_ring->count; + + do { + union e1000_adv_tx_desc *eop_desc = tx_buffer->next_to_watch; + + /* if next_to_watch is not set then there is no work pending */ + if (!eop_desc) + break; + + /* prevent any other reads prior to eop_desc */ + read_barrier_depends(); + + /* if DD is not set pending work has not been completed */ + if (!(eop_desc->wb.status & cpu_to_le32(E1000_TXD_STAT_DD))) + break; + + /* clear next_to_watch to prevent false hangs */ + tx_buffer->next_to_watch = NULL; + + /* update the statistics for this packet */ + total_bytes += tx_buffer->bytecount; + total_packets += tx_buffer->gso_segs; + + /* free the skb */ + dev_kfree_skb_any(tx_buffer->skb); + + /* unmap skb header data */ + dma_unmap_single(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + + /* clear tx_buffer data */ + tx_buffer->skb = NULL; + dma_unmap_len_set(tx_buffer, len, 0); + + /* clear last DMA location and unmap remaining buffers */ + while (tx_desc != eop_desc) { + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + + /* unmap any remaining paged data */ + if (dma_unmap_len(tx_buffer, len)) { + dma_unmap_page(tx_ring->dev, + dma_unmap_addr(tx_buffer, dma), + dma_unmap_len(tx_buffer, len), + DMA_TO_DEVICE); + dma_unmap_len_set(tx_buffer, len, 0); + } + } + + /* move us one more past the eop_desc for start of next pkt */ + tx_buffer++; + tx_desc++; + i++; + if (unlikely(!i)) { + i -= tx_ring->count; + tx_buffer = tx_ring->tx_buffer_info; + tx_desc = IGB_TX_DESC(tx_ring, 0); + } + + /* issue prefetch for next Tx descriptor */ + prefetch(tx_desc); + + /* update budget accounting */ + budget--; + } while (likely(budget)); + + netdev_tx_completed_queue(txring_txq(tx_ring), + total_packets, total_bytes); + + i += tx_ring->count; + tx_ring->next_to_clean = i; + tx_ring->tx_stats.bytes += total_bytes; + tx_ring->tx_stats.packets += total_packets; + q_vector->tx.total_bytes += total_bytes; + q_vector->tx.total_packets += total_packets; + +#ifdef DEBUG + if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags) && + !(adapter->disable_hw_reset && adapter->tx_hang_detected)) { +#else + if (test_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags)) { +#endif + struct e1000_hw *hw = &adapter->hw; + + /* Detect a transmit hang in hardware, this serializes the + * check with the clearing of time_stamp and movement of i */ + clear_bit(IGB_RING_FLAG_TX_DETECT_HANG, &tx_ring->flags); + if (tx_buffer->next_to_watch && + time_after(jiffies, tx_buffer->time_stamp + + (adapter->tx_timeout_factor * HZ)) + && !(E1000_READ_REG(hw, E1000_STATUS) & + E1000_STATUS_TXOFF)) { + + /* detected Tx unit hang */ +#ifdef DEBUG + adapter->tx_hang_detected = TRUE; + if (adapter->disable_hw_reset) { + DPRINTK(DRV, WARNING, + "Deactivating netdev watchdog timer\n"); + if (del_timer(&netdev_ring(tx_ring)->watchdog_timer)) + dev_put(netdev_ring(tx_ring)); +#ifndef HAVE_NET_DEVICE_OPS + netdev_ring(tx_ring)->tx_timeout = NULL; +#endif + } +#endif /* DEBUG */ + dev_err(tx_ring->dev, + "Detected Tx Unit Hang\n" + " Tx Queue <%d>\n" + " TDH <%x>\n" + " TDT <%x>\n" + " next_to_use <%x>\n" + " next_to_clean <%x>\n" + "buffer_info[next_to_clean]\n" + " time_stamp <%lx>\n" + " next_to_watch <%p>\n" + " jiffies <%lx>\n" + " desc.status <%x>\n", + tx_ring->queue_index, + E1000_READ_REG(hw, E1000_TDH(tx_ring->reg_idx)), + readl(tx_ring->tail), + tx_ring->next_to_use, + tx_ring->next_to_clean, + tx_buffer->time_stamp, + tx_buffer->next_to_watch, + jiffies, + tx_buffer->next_to_watch->wb.status); + if (netif_is_multiqueue(netdev_ring(tx_ring))) + netif_stop_subqueue(netdev_ring(tx_ring), + ring_queue_index(tx_ring)); + else + netif_stop_queue(netdev_ring(tx_ring)); + + /* we are about to reset, no point in enabling stuff */ + return true; + } + } + +#define TX_WAKE_THRESHOLD (DESC_NEEDED * 2) + if (unlikely(total_packets && + netif_carrier_ok(netdev_ring(tx_ring)) && + igb_desc_unused(tx_ring) >= TX_WAKE_THRESHOLD)) { + /* Make sure that anybody stopping the queue after this + * sees the new next_to_clean. + */ + smp_mb(); + if (netif_is_multiqueue(netdev_ring(tx_ring))) { + if (__netif_subqueue_stopped(netdev_ring(tx_ring), + ring_queue_index(tx_ring)) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_subqueue(netdev_ring(tx_ring), + ring_queue_index(tx_ring)); + tx_ring->tx_stats.restart_queue++; + } + } else { + if (netif_queue_stopped(netdev_ring(tx_ring)) && + !(test_bit(__IGB_DOWN, &adapter->state))) { + netif_wake_queue(netdev_ring(tx_ring)); + tx_ring->tx_stats.restart_queue++; + } + } + } + + return !!budget; +} + +#ifdef HAVE_VLAN_RX_REGISTER +/** + * igb_receive_skb - helper function to handle rx indications + * @q_vector: structure containing interrupt and ring information + * @skb: packet to send up + **/ +static void igb_receive_skb(struct igb_q_vector *q_vector, + struct sk_buff *skb) +{ + struct vlan_group **vlgrp = netdev_priv(skb->dev); + + if (IGB_CB(skb)->vid) { + if (*vlgrp) { + vlan_gro_receive(&q_vector->napi, *vlgrp, + IGB_CB(skb)->vid, skb); + } else { + dev_kfree_skb_any(skb); + } + } else { + napi_gro_receive(&q_vector->napi, skb); + } +} + +#endif /* HAVE_VLAN_RX_REGISTER */ +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +/** + * igb_reuse_rx_page - page flip buffer and store it back on the ring + * @rx_ring: rx descriptor ring to store buffers on + * @old_buff: donor buffer to have page reused + * + * Synchronizes page for reuse by the adapter + **/ +static void igb_reuse_rx_page(struct igb_ring *rx_ring, + struct igb_rx_buffer *old_buff) +{ + struct igb_rx_buffer *new_buff; + u16 nta = rx_ring->next_to_alloc; + + new_buff = &rx_ring->rx_buffer_info[nta]; + + /* update, and store next to alloc */ + nta++; + rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0; + + /* transfer page from old buffer to new buffer */ + memcpy(new_buff, old_buff, sizeof(struct igb_rx_buffer)); + + /* sync the buffer for use by the device */ + dma_sync_single_range_for_device(rx_ring->dev, old_buff->dma, + old_buff->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); +} + +static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer, + struct page *page, + unsigned int truesize) +{ + /* avoid re-using remote pages */ + if (unlikely(page_to_nid(page) != numa_node_id())) + return false; + +#if (PAGE_SIZE < 8192) + /* if we are only owner of page we can reuse it */ + if (unlikely(page_count(page) != 1)) + return false; + + /* flip page offset to other buffer */ + rx_buffer->page_offset ^= IGB_RX_BUFSZ; + +#else + /* move offset up to the next cache line */ + rx_buffer->page_offset += truesize; + + if (rx_buffer->page_offset > (PAGE_SIZE - IGB_RX_BUFSZ)) + return false; +#endif + + /* bump ref count on page before it is given to the stack */ + get_page(page); + + return true; +} + +/** + * igb_add_rx_frag - Add contents of Rx buffer to sk_buff + * @rx_ring: rx descriptor ring to transact packets on + * @rx_buffer: buffer containing page to add + * @rx_desc: descriptor containing length of buffer written by hardware + * @skb: sk_buff to place the data into + * + * This function will add the data contained in rx_buffer->page to the skb. + * This is done either through a direct copy if the data in the buffer is + * less than the skb header size, otherwise it will just attach the page as + * a frag to the skb. + * + * The function will then update the page offset if necessary and return + * true if the buffer can be reused by the adapter. + **/ +static bool igb_add_rx_frag(struct igb_ring *rx_ring, + struct igb_rx_buffer *rx_buffer, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct page *page = rx_buffer->page; + unsigned int size = le16_to_cpu(rx_desc->wb.upper.length); +#if (PAGE_SIZE < 8192) + unsigned int truesize = IGB_RX_BUFSZ; +#else + unsigned int truesize = ALIGN(size, L1_CACHE_BYTES); +#endif + + if ((size <= IGB_RX_HDR_LEN) && !skb_is_nonlinear(skb)) { + unsigned char *va = page_address(page) + rx_buffer->page_offset; + +#ifdef HAVE_PTP_1588_CLOCK + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + va += IGB_TS_HDR_LEN; + size -= IGB_TS_HDR_LEN; + } +#endif /* HAVE_PTP_1588_CLOCK */ + + memcpy(__skb_put(skb, size), va, ALIGN(size, sizeof(long))); + + /* we can reuse buffer as-is, just make sure it is local */ + if (likely(page_to_nid(page) == numa_node_id())) + return true; + + /* this page cannot be reused so discard it */ + put_page(page); + return false; + } + + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rx_buffer->page_offset, size, truesize); + + return igb_can_reuse_rx_page(rx_buffer, page, truesize); +} + +static struct sk_buff *igb_fetch_rx_buffer(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct igb_rx_buffer *rx_buffer; + struct page *page; + + rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + + page = rx_buffer->page; + prefetchw(page); + + if (likely(!skb)) { + void *page_addr = page_address(page) + + rx_buffer->page_offset; + + /* prefetch first cache line of first page */ + prefetch(page_addr); +#if L1_CACHE_BYTES < 128 + prefetch(page_addr + L1_CACHE_BYTES); +#endif + + /* allocate a skb to store the frags */ + skb = netdev_alloc_skb_ip_align(rx_ring->netdev, + IGB_RX_HDR_LEN); + if (unlikely(!skb)) { + rx_ring->rx_stats.alloc_failed++; + return NULL; + } + + /* + * we will be copying header into skb->data in + * pskb_may_pull so it is in our interest to prefetch + * it now to avoid a possible cache miss + */ + prefetchw(skb->data); + } + + /* we are reusing so sync this buffer for CPU use */ + dma_sync_single_range_for_cpu(rx_ring->dev, + rx_buffer->dma, + rx_buffer->page_offset, + IGB_RX_BUFSZ, + DMA_FROM_DEVICE); + + /* pull page into skb */ + if (igb_add_rx_frag(rx_ring, rx_buffer, rx_desc, skb)) { + /* hand second half of page back to the ring */ + igb_reuse_rx_page(rx_ring, rx_buffer); + } else { + /* we are not reusing the buffer so unmap it */ + dma_unmap_page(rx_ring->dev, rx_buffer->dma, + PAGE_SIZE, DMA_FROM_DEVICE); + } + + /* clear contents of rx_buffer */ + rx_buffer->page = NULL; + + return skb; +} + +#endif +static inline void igb_rx_checksum(struct igb_ring *ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + skb_checksum_none_assert(skb); + + /* Ignore Checksum bit is set */ + if (igb_test_staterr(rx_desc, E1000_RXD_STAT_IXSM)) + return; + + /* Rx checksum disabled via ethtool */ + if (!(netdev_ring(ring)->features & NETIF_F_RXCSUM)) + return; + + /* TCP/UDP checksum error bit is set */ + if (igb_test_staterr(rx_desc, + E1000_RXDEXT_STATERR_TCPE | + E1000_RXDEXT_STATERR_IPE)) { + /* + * work around errata with sctp packets where the TCPE aka + * L4E bit is set incorrectly on 64 byte (60 byte w/o crc) + * packets, (aka let the stack check the crc32c) + */ + if (!((skb->len == 60) && + test_bit(IGB_RING_FLAG_RX_SCTP_CSUM, &ring->flags))) + ring->rx_stats.csum_err++; + + /* let the stack verify checksum errors */ + return; + } + /* It must be a TCP or UDP packet with a valid checksum */ + if (igb_test_staterr(rx_desc, E1000_RXD_STAT_TCPCS | + E1000_RXD_STAT_UDPCS)) + skb->ip_summed = CHECKSUM_UNNECESSARY; +} + +#ifdef NETIF_F_RXHASH +static inline void igb_rx_hash(struct igb_ring *ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + if (netdev_ring(ring)->features & NETIF_F_RXHASH) + skb_set_hash(skb, le32_to_cpu(rx_desc->wb.lower.hi_dword.rss), + PKT_HASH_TYPE_L3); +} + +#endif +#ifndef IGB_NO_LRO +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT +/** + * igb_merge_active_tail - merge active tail into lro skb + * @tail: pointer to active tail in frag_list + * + * This function merges the length and data of an active tail into the + * skb containing the frag_list. It resets the tail's pointer to the head, + * but it leaves the heads pointer to tail intact. + **/ +static inline struct sk_buff *igb_merge_active_tail(struct sk_buff *tail) +{ + struct sk_buff *head = IGB_CB(tail)->head; + + if (!head) + return tail; + + head->len += tail->len; + head->data_len += tail->len; + head->truesize += tail->len; + + IGB_CB(tail)->head = NULL; + + return head; +} + +/** + * igb_add_active_tail - adds an active tail into the skb frag_list + * @head: pointer to the start of the skb + * @tail: pointer to active tail to add to frag_list + * + * This function adds an active tail to the end of the frag list. This tail + * will still be receiving data so we cannot yet ad it's stats to the main + * skb. That is done via igb_merge_active_tail. + **/ +static inline void igb_add_active_tail(struct sk_buff *head, struct sk_buff *tail) +{ + struct sk_buff *old_tail = IGB_CB(head)->tail; + + if (old_tail) { + igb_merge_active_tail(old_tail); + old_tail->next = tail; + } else { + skb_shinfo(head)->frag_list = tail; + } + + IGB_CB(tail)->head = head; + IGB_CB(head)->tail = tail; + + IGB_CB(head)->append_cnt++; +} + +/** + * igb_close_active_frag_list - cleanup pointers on a frag_list skb + * @head: pointer to head of an active frag list + * + * This function will clear the frag_tail_tracker pointer on an active + * frag_list and returns true if the pointer was actually set + **/ +static inline bool igb_close_active_frag_list(struct sk_buff *head) +{ + struct sk_buff *tail = IGB_CB(head)->tail; + + if (!tail) + return false; + + igb_merge_active_tail(tail); + + IGB_CB(head)->tail = NULL; + + return true; +} + +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_can_lro - returns true if packet is TCP/IPV4 and LRO is enabled + * @adapter: board private structure + * @rx_desc: pointer to the rx descriptor + * @skb: pointer to the skb to be merged + * + **/ +static inline bool igb_can_lro(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct iphdr *iph = (struct iphdr *)skb->data; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + + /* verify hardware indicates this is IPv4/TCP */ + if((!(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) || + !(pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4)))) + return false; + + /* .. and LRO is enabled */ + if (!(netdev_ring(rx_ring)->features & NETIF_F_LRO)) + return false; + + /* .. and we are not in promiscuous mode */ + if (netdev_ring(rx_ring)->flags & IFF_PROMISC) + return false; + + /* .. and the header is large enough for us to read IP/TCP fields */ + if (!pskb_may_pull(skb, sizeof(struct igb_lrohdr))) + return false; + + /* .. and there are no VLANs on packet */ + if (skb->protocol != __constant_htons(ETH_P_IP)) + return false; + + /* .. and we are version 4 with no options */ + if (*(u8 *)iph != 0x45) + return false; + + /* .. and the packet is not fragmented */ + if (iph->frag_off & htons(IP_MF | IP_OFFSET)) + return false; + + /* .. and that next header is TCP */ + if (iph->protocol != IPPROTO_TCP) + return false; + + return true; +} + +static inline struct igb_lrohdr *igb_lro_hdr(struct sk_buff *skb) +{ + return (struct igb_lrohdr *)skb->data; +} + +/** + * igb_lro_flush - Indicate packets to upper layer. + * + * Update IP and TCP header part of head skb if more than one + * skb's chained and indicate packets to upper layer. + **/ +static void igb_lro_flush(struct igb_q_vector *q_vector, + struct sk_buff *skb) +{ + struct igb_lro_list *lrolist = &q_vector->lrolist; + + __skb_unlink(skb, &lrolist->active); + + if (IGB_CB(skb)->append_cnt) { + struct igb_lrohdr *lroh = igb_lro_hdr(skb); + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* close any active lro contexts */ + igb_close_active_frag_list(skb); + +#endif + /* incorporate ip header and re-calculate checksum */ + lroh->iph.tot_len = ntohs(skb->len); + lroh->iph.check = 0; + + /* header length is 5 since we know no options exist */ + lroh->iph.check = ip_fast_csum((u8 *)lroh, 5); + + /* clear TCP checksum to indicate we are an LRO frame */ + lroh->th.check = 0; + + /* incorporate latest timestamp into the tcp header */ + if (IGB_CB(skb)->tsecr) { + lroh->ts[2] = IGB_CB(skb)->tsecr; + lroh->ts[1] = htonl(IGB_CB(skb)->tsval); + } +#ifdef NETIF_F_GSO + + skb_shinfo(skb)->gso_size = IGB_CB(skb)->mss; + skb_shinfo(skb)->gso_type = SKB_GSO_TCPV4; +#endif + } + +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, skb); +#else + napi_gro_receive(&q_vector->napi, skb); +#endif + lrolist->stats.flushed++; +} + +static void igb_lro_flush_all(struct igb_q_vector *q_vector) +{ + struct igb_lro_list *lrolist = &q_vector->lrolist; + struct sk_buff *skb, *tmp; + + skb_queue_reverse_walk_safe(&lrolist->active, skb, tmp) + igb_lro_flush(q_vector, skb); +} + +/* + * igb_lro_header_ok - Main LRO function. + **/ +static void igb_lro_header_ok(struct sk_buff *skb) +{ + struct igb_lrohdr *lroh = igb_lro_hdr(skb); + u16 opt_bytes, data_len; + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + IGB_CB(skb)->tail = NULL; +#endif + IGB_CB(skb)->tsecr = 0; + IGB_CB(skb)->append_cnt = 0; + IGB_CB(skb)->mss = 0; + + /* ensure that the checksum is valid */ + if (skb->ip_summed != CHECKSUM_UNNECESSARY) + return; + + /* If we see CE codepoint in IP header, packet is not mergeable */ + if (INET_ECN_is_ce(ipv4_get_dsfield(&lroh->iph))) + return; + + /* ensure no bits set besides ack or psh */ + if (lroh->th.fin || lroh->th.syn || lroh->th.rst || + lroh->th.urg || lroh->th.ece || lroh->th.cwr || + !lroh->th.ack) + return; + + /* store the total packet length */ + data_len = ntohs(lroh->iph.tot_len); + + /* remove any padding from the end of the skb */ + __pskb_trim(skb, data_len); + + /* remove header length from data length */ + data_len -= sizeof(struct igb_lrohdr); + + /* + * check for timestamps. Since the only option we handle are timestamps, + * we only have to handle the simple case of aligned timestamps + */ + opt_bytes = (lroh->th.doff << 2) - sizeof(struct tcphdr); + if (opt_bytes != 0) { + if ((opt_bytes != TCPOLEN_TSTAMP_ALIGNED) || + !pskb_may_pull(skb, sizeof(struct igb_lrohdr) + + TCPOLEN_TSTAMP_ALIGNED) || + (lroh->ts[0] != htonl((TCPOPT_NOP << 24) | + (TCPOPT_NOP << 16) | + (TCPOPT_TIMESTAMP << 8) | + TCPOLEN_TIMESTAMP)) || + (lroh->ts[2] == 0)) { + return; + } + + IGB_CB(skb)->tsval = ntohl(lroh->ts[1]); + IGB_CB(skb)->tsecr = lroh->ts[2]; + + data_len -= TCPOLEN_TSTAMP_ALIGNED; + } + + /* record data_len as mss for the packet */ + IGB_CB(skb)->mss = data_len; + IGB_CB(skb)->next_seq = ntohl(lroh->th.seq); +} + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +static void igb_merge_frags(struct sk_buff *lro_skb, struct sk_buff *new_skb) +{ + struct skb_shared_info *sh_info; + struct skb_shared_info *new_skb_info; + unsigned int data_len; + + sh_info = skb_shinfo(lro_skb); + new_skb_info = skb_shinfo(new_skb); + + /* copy frags into the last skb */ + memcpy(sh_info->frags + sh_info->nr_frags, + new_skb_info->frags, + new_skb_info->nr_frags * sizeof(skb_frag_t)); + + /* copy size data over */ + sh_info->nr_frags += new_skb_info->nr_frags; + data_len = IGB_CB(new_skb)->mss; + lro_skb->len += data_len; + lro_skb->data_len += data_len; + lro_skb->truesize += data_len; + + /* wipe record of data from new_skb */ + new_skb_info->nr_frags = 0; + new_skb->len = new_skb->data_len = 0; + dev_kfree_skb_any(new_skb); +} + +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_lro_receive - if able, queue skb into lro chain + * @q_vector: structure containing interrupt and ring information + * @new_skb: pointer to current skb being checked + * + * Checks whether the skb given is eligible for LRO and if that's + * fine chains it to the existing lro_skb based on flowid. If an LRO for + * the flow doesn't exist create one. + **/ +static void igb_lro_receive(struct igb_q_vector *q_vector, + struct sk_buff *new_skb) +{ + struct sk_buff *lro_skb; + struct igb_lro_list *lrolist = &q_vector->lrolist; + struct igb_lrohdr *lroh = igb_lro_hdr(new_skb); + __be32 saddr = lroh->iph.saddr; + __be32 daddr = lroh->iph.daddr; + __be32 tcp_ports = *(__be32 *)&lroh->th; + u16 data_len; +#ifdef HAVE_VLAN_RX_REGISTER + u16 vid = IGB_CB(new_skb)->vid; +#else + u16 vid = new_skb->vlan_tci; +#endif + + igb_lro_header_ok(new_skb); + + /* + * we have a packet that might be eligible for LRO, + * so see if it matches anything we might expect + */ + skb_queue_walk(&lrolist->active, lro_skb) { + if (*(__be32 *)&igb_lro_hdr(lro_skb)->th != tcp_ports || + igb_lro_hdr(lro_skb)->iph.saddr != saddr || + igb_lro_hdr(lro_skb)->iph.daddr != daddr) + continue; + +#ifdef HAVE_VLAN_RX_REGISTER + if (IGB_CB(lro_skb)->vid != vid) +#else + if (lro_skb->vlan_tci != vid) +#endif + continue; + + /* out of order packet */ + if (IGB_CB(lro_skb)->next_seq != IGB_CB(new_skb)->next_seq) { + igb_lro_flush(q_vector, lro_skb); + IGB_CB(new_skb)->mss = 0; + break; + } + + /* TCP timestamp options have changed */ + if (!IGB_CB(lro_skb)->tsecr != !IGB_CB(new_skb)->tsecr) { + igb_lro_flush(q_vector, lro_skb); + break; + } + + /* make sure timestamp values are increasing */ + if (IGB_CB(lro_skb)->tsecr && + IGB_CB(lro_skb)->tsval > IGB_CB(new_skb)->tsval) { + igb_lro_flush(q_vector, lro_skb); + IGB_CB(new_skb)->mss = 0; + break; + } + + data_len = IGB_CB(new_skb)->mss; + + /* Check for all of the above below + * malformed header + * no tcp data + * resultant packet would be too large + * new skb is larger than our current mss + * data would remain in header + * we would consume more frags then the sk_buff contains + * ack sequence numbers changed + * window size has changed + */ + if (data_len == 0 || + data_len > IGB_CB(lro_skb)->mss || + data_len > IGB_CB(lro_skb)->free || +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + data_len != new_skb->data_len || + skb_shinfo(new_skb)->nr_frags >= + (MAX_SKB_FRAGS - skb_shinfo(lro_skb)->nr_frags) || +#endif + igb_lro_hdr(lro_skb)->th.ack_seq != lroh->th.ack_seq || + igb_lro_hdr(lro_skb)->th.window != lroh->th.window) { + igb_lro_flush(q_vector, lro_skb); + break; + } + + /* Remove IP and TCP header*/ + skb_pull(new_skb, new_skb->len - data_len); + + /* update timestamp and timestamp echo response */ + IGB_CB(lro_skb)->tsval = IGB_CB(new_skb)->tsval; + IGB_CB(lro_skb)->tsecr = IGB_CB(new_skb)->tsecr; + + /* update sequence and free space */ + IGB_CB(lro_skb)->next_seq += data_len; + IGB_CB(lro_skb)->free -= data_len; + + /* update append_cnt */ + IGB_CB(lro_skb)->append_cnt++; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* if header is empty pull pages into current skb */ + igb_merge_frags(lro_skb, new_skb); +#else + /* chain this new skb in frag_list */ + igb_add_active_tail(lro_skb, new_skb); +#endif + + if ((data_len < IGB_CB(lro_skb)->mss) || lroh->th.psh || + skb_shinfo(lro_skb)->nr_frags == MAX_SKB_FRAGS) { + igb_lro_hdr(lro_skb)->th.psh |= lroh->th.psh; + igb_lro_flush(q_vector, lro_skb); + } + + lrolist->stats.coal++; + return; + } + + if (IGB_CB(new_skb)->mss && !lroh->th.psh) { + /* if we are at capacity flush the tail */ + if (skb_queue_len(&lrolist->active) >= IGB_LRO_MAX) { + lro_skb = skb_peek_tail(&lrolist->active); + if (lro_skb) + igb_lro_flush(q_vector, lro_skb); + } + + /* update sequence and free space */ + IGB_CB(new_skb)->next_seq += IGB_CB(new_skb)->mss; + IGB_CB(new_skb)->free = 65521 - new_skb->len; + + /* .. and insert at the front of the active list */ + __skb_queue_head(&lrolist->active, new_skb); + + lrolist->stats.coal++; + return; + } + + /* packet not handled by any of the above, pass it to the stack */ +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, new_skb); +#else + napi_gro_receive(&q_vector->napi, new_skb); +#endif +} + +#endif /* IGB_NO_LRO */ +/** + * igb_process_skb_fields - Populate skb header fields from Rx descriptor + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being populated + * + * This function checks the ring, descriptor, and packet information in + * order to populate the hash, checksum, VLAN, timestamp, protocol, and + * other fields within the skb. + **/ +static void igb_process_skb_fields(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct net_device *dev = rx_ring->netdev; + __le16 pkt_info = rx_desc->wb.lower.lo_dword.hs_rss.pkt_info; + +#ifdef NETIF_F_RXHASH + igb_rx_hash(rx_ring, rx_desc, skb); + +#endif + igb_rx_checksum(rx_ring, rx_desc, skb); + + /* update packet type stats */ + if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4)) + rx_ring->rx_stats.ipv4_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV4_EX)) + rx_ring->rx_stats.ipv4e_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6)) + rx_ring->rx_stats.ipv6_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_IPV6_EX)) + rx_ring->rx_stats.ipv6e_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_TCP)) + rx_ring->rx_stats.tcp_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_UDP)) + rx_ring->rx_stats.udp_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_SCTP)) + rx_ring->rx_stats.sctp_packets++; + else if (pkt_info & cpu_to_le16(E1000_RXDADV_PKTTYPE_NFS)) + rx_ring->rx_stats.nfs_packets++; + +#ifdef HAVE_PTP_1588_CLOCK + igb_ptp_rx_hwtstamp(rx_ring, rx_desc, skb); +#endif /* HAVE_PTP_1588_CLOCK */ + +#ifdef NETIF_F_HW_VLAN_CTAG_RX + if ((dev->features & NETIF_F_HW_VLAN_CTAG_RX) && +#else + if ((dev->features & NETIF_F_HW_VLAN_RX) && +#endif + igb_test_staterr(rx_desc, E1000_RXD_STAT_VP)) { + u16 vid = 0; + if (igb_test_staterr(rx_desc, E1000_RXDEXT_STATERR_LB) && + test_bit(IGB_RING_FLAG_RX_LB_VLAN_BSWAP, &rx_ring->flags)) + vid = be16_to_cpu(rx_desc->wb.upper.vlan); + else + vid = le16_to_cpu(rx_desc->wb.upper.vlan); +#ifdef HAVE_VLAN_RX_REGISTER + IGB_CB(skb)->vid = vid; + } else { + IGB_CB(skb)->vid = 0; +#else + +#ifdef HAVE_VLAN_PROTOCOL + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); +#else + __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vid); +#endif + + +#endif + } + + skb_record_rx_queue(skb, rx_ring->queue_index); + + skb->protocol = eth_type_trans(skb, dev); +} + +/** + * igb_is_non_eop - process handling of non-EOP buffers + * @rx_ring: Rx ring being processed + * @rx_desc: Rx descriptor for current buffer + * + * This function updates next to clean. If the buffer is an EOP buffer + * this function exits returning false, otherwise it will place the + * sk_buff in the next buffer to be chained and return true indicating + * that this is in fact a non-EOP buffer. + **/ +static bool igb_is_non_eop(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc) +{ + u32 ntc = rx_ring->next_to_clean + 1; + + /* fetch, update, and store next to clean */ + ntc = (ntc < rx_ring->count) ? ntc : 0; + rx_ring->next_to_clean = ntc; + + prefetch(IGB_RX_DESC(rx_ring, ntc)); + + if (likely(igb_test_staterr(rx_desc, E1000_RXD_STAT_EOP))) + return false; + + return true; +} + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT +/* igb_clean_rx_irq -- * legacy */ +static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) +{ + struct igb_ring *rx_ring = q_vector->rx.ring; + unsigned int total_bytes = 0, total_packets = 0; + u16 cleaned_count = igb_desc_unused(rx_ring); + + do { + struct igb_rx_buffer *rx_buffer; + union e1000_adv_rx_desc *rx_desc; + struct sk_buff *skb; + u16 ntc; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IGB_RX_BUFFER_WRITE) { + igb_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } + + ntc = rx_ring->next_to_clean; + rx_desc = IGB_RX_DESC(rx_ring, ntc); + rx_buffer = &rx_ring->rx_buffer_info[ntc]; + + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + break; + + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + + skb = rx_buffer->skb; + + prefetch(skb->data); + + /* pull the header of the skb in */ + __skb_put(skb, le16_to_cpu(rx_desc->wb.upper.length)); + + /* clear skb reference in buffer info structure */ + rx_buffer->skb = NULL; + + cleaned_count++; + + BUG_ON(igb_is_non_eop(rx_ring, rx_desc)); + + dma_unmap_single(rx_ring->dev, rx_buffer->dma, + rx_ring->rx_buffer_len, + DMA_FROM_DEVICE); + rx_buffer->dma = 0; + + if (igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)) { + dev_kfree_skb_any(skb); + continue; + } + + total_bytes += skb->len; + + /* populate checksum, timestamp, VLAN, and protocol */ + igb_process_skb_fields(rx_ring, rx_desc, skb); + +#ifndef IGB_NO_LRO + if (igb_can_lro(rx_ring, rx_desc, skb)) + igb_lro_receive(q_vector, skb); + else +#endif +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, skb); +#else + napi_gro_receive(&q_vector->napi, skb); +#endif + +#ifndef NETIF_F_GRO + netdev_ring(rx_ring)->last_rx = jiffies; + +#endif + /* update budget accounting */ + total_packets++; + } while (likely(total_packets < budget)); + + rx_ring->rx_stats.packets += total_packets; + rx_ring->rx_stats.bytes += total_bytes; + q_vector->rx.total_packets += total_packets; + q_vector->rx.total_bytes += total_bytes; + + if (cleaned_count) + igb_alloc_rx_buffers(rx_ring, cleaned_count); + +#ifndef IGB_NO_LRO + igb_lro_flush_all(q_vector); + +#endif /* IGB_NO_LRO */ + return total_packets < budget; +} +#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_get_headlen - determine size of header for LRO/GRO + * @data: pointer to the start of the headers + * @max_len: total length of section to find headers in + * + * This function is meant to determine the length of headers that will + * be recognized by hardware for LRO, and GRO offloads. The main + * motivation of doing this is to only perform one pull for IPv4 TCP + * packets so that we can do basic things like calculating the gso_size + * based on the average data per packet. + **/ +static unsigned int igb_get_headlen(unsigned char *data, + unsigned int max_len) +{ + union { + unsigned char *network; + /* l2 headers */ + struct ethhdr *eth; + struct vlan_hdr *vlan; + /* l3 headers */ + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + } hdr; + __be16 protocol; + u8 nexthdr = 0; /* default to not TCP */ + u8 hlen; + + /* this should never happen, but better safe than sorry */ + if (max_len < ETH_HLEN) + return max_len; + + /* initialize network frame pointer */ + hdr.network = data; + + /* set first protocol and move network header forward */ + protocol = hdr.eth->h_proto; + hdr.network += ETH_HLEN; + + /* handle any vlan tag if present */ + if (protocol == __constant_htons(ETH_P_8021Q)) { + if ((hdr.network - data) > (max_len - VLAN_HLEN)) + return max_len; + + protocol = hdr.vlan->h_vlan_encapsulated_proto; + hdr.network += VLAN_HLEN; + } + + /* handle L3 protocols */ + if (protocol == __constant_htons(ETH_P_IP)) { + if ((hdr.network - data) > (max_len - sizeof(struct iphdr))) + return max_len; + + /* access ihl as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[0] & 0x0F) << 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct iphdr)) + return hdr.network - data; + + /* record next protocol if header is present */ + if (!(hdr.ipv4->frag_off & htons(IP_OFFSET))) + nexthdr = hdr.ipv4->protocol; +#ifdef NETIF_F_TSO6 + } else if (protocol == __constant_htons(ETH_P_IPV6)) { + if ((hdr.network - data) > (max_len - sizeof(struct ipv6hdr))) + return max_len; + + /* record next protocol */ + nexthdr = hdr.ipv6->nexthdr; + hlen = sizeof(struct ipv6hdr); +#endif /* NETIF_F_TSO6 */ + } else { + return hdr.network - data; + } + + /* relocate pointer to start of L4 header */ + hdr.network += hlen; + + /* finally sort out TCP */ + if (nexthdr == IPPROTO_TCP) { + if ((hdr.network - data) > (max_len - sizeof(struct tcphdr))) + return max_len; + + /* access doff as a u8 to avoid unaligned access on ia64 */ + hlen = (hdr.network[12] & 0xF0) >> 2; + + /* verify hlen meets minimum size requirements */ + if (hlen < sizeof(struct tcphdr)) + return hdr.network - data; + + hdr.network += hlen; + } else if (nexthdr == IPPROTO_UDP) { + if ((hdr.network - data) > (max_len - sizeof(struct udphdr))) + return max_len; + + hdr.network += sizeof(struct udphdr); + } + + /* + * If everything has gone correctly hdr.network should be the + * data section of the packet and will be the end of the header. + * If not then it probably represents the end of the last recognized + * header. + */ + if ((hdr.network - data) < max_len) + return hdr.network - data; + else + return max_len; +} + +/** + * igb_pull_tail - igb specific version of skb_pull_tail + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being adjusted + * + * This function is an igb specific version of __pskb_pull_tail. The + * main difference between this version and the original function is that + * this function can make several assumptions about the state of things + * that allow for significant optimizations versus the standard function. + * As a result we can do things like drop a frag and maintain an accurate + * truesize for the skb. + */ +static void igb_pull_tail(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + struct skb_frag_struct *frag = &skb_shinfo(skb)->frags[0]; + unsigned char *va; + unsigned int pull_len; + + /* + * it is valid to use page_address instead of kmap since we are + * working with pages allocated out of the lomem pool per + * alloc_page(GFP_ATOMIC) + */ + va = skb_frag_address(frag); + +#ifdef HAVE_PTP_1588_CLOCK + if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { + /* retrieve timestamp from buffer */ + igb_ptp_rx_pktstamp(rx_ring->q_vector, va, skb); + + /* update pointers to remove timestamp header */ + skb_frag_size_sub(frag, IGB_TS_HDR_LEN); + frag->page_offset += IGB_TS_HDR_LEN; + skb->data_len -= IGB_TS_HDR_LEN; + skb->len -= IGB_TS_HDR_LEN; + + /* move va to start of packet data */ + va += IGB_TS_HDR_LEN; + } +#endif /* HAVE_PTP_1588_CLOCK */ + + /* + * we need the header to contain the greater of either ETH_HLEN or + * 60 bytes if the skb->len is less than 60 for skb_pad. + */ + pull_len = igb_get_headlen(va, IGB_RX_HDR_LEN); + + /* align pull length to size of long to optimize memcpy performance */ + skb_copy_to_linear_data(skb, va, ALIGN(pull_len, sizeof(long))); + + /* update all of the pointers */ + skb_frag_size_sub(frag, pull_len); + frag->page_offset += pull_len; + skb->data_len -= pull_len; + skb->tail += pull_len; +} + +/** + * igb_cleanup_headers - Correct corrupted or empty headers + * @rx_ring: rx descriptor ring packet is being transacted on + * @rx_desc: pointer to the EOP Rx descriptor + * @skb: pointer to current skb being fixed + * + * Address the case where we are pulling data in on pages only + * and as such no data is present in the skb header. + * + * In addition if skb is not at least 60 bytes we need to pad it so that + * it is large enough to qualify as a valid Ethernet frame. + * + * Returns true if an error was encountered and skb was freed. + **/ +static bool igb_cleanup_headers(struct igb_ring *rx_ring, + union e1000_adv_rx_desc *rx_desc, + struct sk_buff *skb) +{ + + if (unlikely((igb_test_staterr(rx_desc, + E1000_RXDEXT_ERR_FRAME_ERR_MASK)))) { + struct net_device *netdev = rx_ring->netdev; + if (!(netdev->features & NETIF_F_RXALL)) { + dev_kfree_skb_any(skb); + return true; + } + } + + /* place header in linear portion of buffer */ + if (skb_is_nonlinear(skb)) + igb_pull_tail(rx_ring, rx_desc, skb); + + /* if skb_pad returns an error the skb was freed */ + if (unlikely(skb->len < 60)) { + int pad_len = 60 - skb->len; + + if (skb_pad(skb, pad_len)) + return true; + __skb_put(skb, pad_len); + } + + return false; +} + +/* igb_clean_rx_irq -- * packet split */ +static bool igb_clean_rx_irq(struct igb_q_vector *q_vector, int budget) +{ + struct igb_ring *rx_ring = q_vector->rx.ring; + struct sk_buff *skb = rx_ring->skb; + unsigned int total_bytes = 0, total_packets = 0; + u16 cleaned_count = igb_desc_unused(rx_ring); + + do { + union e1000_adv_rx_desc *rx_desc; + + /* return some buffers to hardware, one at a time is too slow */ + if (cleaned_count >= IGB_RX_BUFFER_WRITE) { + igb_alloc_rx_buffers(rx_ring, cleaned_count); + cleaned_count = 0; + } + + rx_desc = IGB_RX_DESC(rx_ring, rx_ring->next_to_clean); + + if (!igb_test_staterr(rx_desc, E1000_RXD_STAT_DD)) + break; + + /* + * This memory barrier is needed to keep us from reading + * any other fields out of the rx_desc until we know the + * RXD_STAT_DD bit is set + */ + rmb(); + + /* retrieve a buffer from the ring */ + skb = igb_fetch_rx_buffer(rx_ring, rx_desc, skb); + + /* exit if we failed to retrieve a buffer */ + if (!skb) + break; + + cleaned_count++; + + /* fetch next buffer in frame if non-eop */ + if (igb_is_non_eop(rx_ring, rx_desc)) + continue; + + /* verify the packet layout is correct */ + if (igb_cleanup_headers(rx_ring, rx_desc, skb)) { + skb = NULL; + continue; + } + + /* probably a little skewed due to removing CRC */ + total_bytes += skb->len; + + /* populate checksum, timestamp, VLAN, and protocol */ + igb_process_skb_fields(rx_ring, rx_desc, skb); + +#ifndef IGB_NO_LRO + if (igb_can_lro(rx_ring, rx_desc, skb)) + igb_lro_receive(q_vector, skb); + else +#endif +#ifdef HAVE_VLAN_RX_REGISTER + igb_receive_skb(q_vector, skb); +#else + napi_gro_receive(&q_vector->napi, skb); +#endif +#ifndef NETIF_F_GRO + + netdev_ring(rx_ring)->last_rx = jiffies; +#endif + + /* reset skb pointer */ + skb = NULL; + + /* update budget accounting */ + total_packets++; + } while (likely(total_packets < budget)); + + /* place incomplete frames back on ring for completion */ + rx_ring->skb = skb; + + rx_ring->rx_stats.packets += total_packets; + rx_ring->rx_stats.bytes += total_bytes; + q_vector->rx.total_packets += total_packets; + q_vector->rx.total_bytes += total_bytes; + + if (cleaned_count) + igb_alloc_rx_buffers(rx_ring, cleaned_count); + +#ifndef IGB_NO_LRO + igb_lro_flush_all(q_vector); + +#endif /* IGB_NO_LRO */ + return total_packets < budget; +} +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT +static bool igb_alloc_mapped_skb(struct igb_ring *rx_ring, + struct igb_rx_buffer *bi) +{ + struct sk_buff *skb = bi->skb; + dma_addr_t dma = bi->dma; + + if (dma) + return true; + + if (likely(!skb)) { + skb = netdev_alloc_skb_ip_align(netdev_ring(rx_ring), + rx_ring->rx_buffer_len); + bi->skb = skb; + if (!skb) { + rx_ring->rx_stats.alloc_failed++; + return false; + } + + /* initialize skb for ring */ + skb_record_rx_queue(skb, ring_queue_index(rx_ring)); + } + + dma = dma_map_single(rx_ring->dev, skb->data, + rx_ring->rx_buffer_len, DMA_FROM_DEVICE); + + /* if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + dev_kfree_skb_any(skb); + bi->skb = NULL; + + rx_ring->rx_stats.alloc_failed++; + return false; + } + + bi->dma = dma; + return true; +} + +#else /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +static bool igb_alloc_mapped_page(struct igb_ring *rx_ring, + struct igb_rx_buffer *bi) +{ + struct page *page = bi->page; + dma_addr_t dma; + + /* since we are recycling buffers we should seldom need to alloc */ + if (likely(page)) + return true; + + /* alloc new page for storage */ + page = alloc_page(GFP_ATOMIC | __GFP_COLD); + if (unlikely(!page)) { + rx_ring->rx_stats.alloc_failed++; + return false; + } + + /* map page for use */ + dma = dma_map_page(rx_ring->dev, page, 0, PAGE_SIZE, DMA_FROM_DEVICE); + + /* + * if mapping failed free memory back to system since + * there isn't much point in holding memory we can't use + */ + if (dma_mapping_error(rx_ring->dev, dma)) { + __free_page(page); + + rx_ring->rx_stats.alloc_failed++; + return false; + } + + bi->dma = dma; + bi->page = page; + bi->page_offset = 0; + + return true; +} + +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ +/** + * igb_alloc_rx_buffers - Replace used receive buffers; packet split + * @adapter: address of board private structure + **/ +void igb_alloc_rx_buffers(struct igb_ring *rx_ring, u16 cleaned_count) +{ + union e1000_adv_rx_desc *rx_desc; + struct igb_rx_buffer *bi; + u16 i = rx_ring->next_to_use; + + /* nothing to do */ + if (!cleaned_count) + return; + + rx_desc = IGB_RX_DESC(rx_ring, i); + bi = &rx_ring->rx_buffer_info[i]; + i -= rx_ring->count; + + do { +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + if (!igb_alloc_mapped_skb(rx_ring, bi)) +#else + if (!igb_alloc_mapped_page(rx_ring, bi)) +#endif /* CONFIG_IGB_DISABLE_PACKET_SPLIT */ + break; + + /* + * Refresh the desc even if buffer_addrs didn't change + * because each write-back erases this info. + */ +#ifdef CONFIG_IGB_DISABLE_PACKET_SPLIT + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma); +#else + rx_desc->read.pkt_addr = cpu_to_le64(bi->dma + bi->page_offset); +#endif + + rx_desc++; + bi++; + i++; + if (unlikely(!i)) { + rx_desc = IGB_RX_DESC(rx_ring, 0); + bi = rx_ring->rx_buffer_info; + i -= rx_ring->count; + } + + /* clear the hdr_addr for the next_to_use descriptor */ + rx_desc->read.hdr_addr = 0; + + cleaned_count--; + } while (cleaned_count); + + i += rx_ring->count; + + if (rx_ring->next_to_use != i) { + /* record the next descriptor to use */ + rx_ring->next_to_use = i; + +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT + /* update next to alloc since we have filled the ring */ + rx_ring->next_to_alloc = i; + +#endif + /* + * Force memory writes to complete before letting h/w + * know there are new descriptors to fetch. (Only + * applicable for weak-ordered memory model archs, + * such as IA-64). + */ + wmb(); + writel(i, rx_ring->tail); + } +} + +#ifdef SIOCGMIIPHY +/** + * igb_mii_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ +static int igb_mii_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct mii_ioctl_data *data = if_mii(ifr); + + if (adapter->hw.phy.media_type != e1000_media_type_copper) + return -EOPNOTSUPP; + + switch (cmd) { + case SIOCGMIIPHY: + data->phy_id = adapter->hw.phy.addr; + break; + case SIOCGMIIREG: + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + if (e1000_read_phy_reg(&adapter->hw, data->reg_num & 0x1F, + &data->val_out)) + return -EIO; + break; + case SIOCSMIIREG: + default: + return -EOPNOTSUPP; + } + return E1000_SUCCESS; +} + +#endif +/** + * igb_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ +static int igb_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { +#ifdef SIOCGMIIPHY + case SIOCGMIIPHY: + case SIOCGMIIREG: + case SIOCSMIIREG: + return igb_mii_ioctl(netdev, ifr, cmd); +#endif +#ifdef HAVE_PTP_1588_CLOCK + case SIOCSHWTSTAMP: + return igb_ptp_hwtstamp_ioctl(netdev, ifr, cmd); +#endif /* HAVE_PTP_1588_CLOCK */ +#ifdef ETHTOOL_OPS_COMPAT + case SIOCETHTOOL: + return ethtool_ioctl(ifr); +#endif + default: + return -EOPNOTSUPP; + } +} + +s32 e1000_read_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + u16 cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -E1000_ERR_CONFIG; + + pci_read_config_word(adapter->pdev, cap_offset + reg, value); + + return E1000_SUCCESS; +} + +s32 e1000_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value) +{ + struct igb_adapter *adapter = hw->back; + u16 cap_offset; + + cap_offset = pci_find_capability(adapter->pdev, PCI_CAP_ID_EXP); + if (!cap_offset) + return -E1000_ERR_CONFIG; + + pci_write_config_word(adapter->pdev, cap_offset + reg, *value); + + return E1000_SUCCESS; +} + +#ifdef HAVE_VLAN_RX_REGISTER +static void igb_vlan_mode(struct net_device *netdev, struct vlan_group *vlgrp) +#else +void igb_vlan_mode(struct net_device *netdev, u32 features) +#endif +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 ctrl, rctl; + int i; +#ifdef HAVE_VLAN_RX_REGISTER + bool enable = !!vlgrp; + + igb_irq_disable(adapter); + + adapter->vlgrp = vlgrp; + + if (!test_bit(__IGB_DOWN, &adapter->state)) + igb_irq_enable(adapter); +#else +#ifdef NETIF_F_HW_VLAN_CTAG_RX + bool enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX); +#else + bool enable = !!(features & NETIF_F_HW_VLAN_RX); +#endif +#endif + + if (enable) { + /* enable VLAN tag insert/strip */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl |= E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Disable CFI check */ + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl &= ~E1000_RCTL_CFIEN; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + } else { + /* disable VLAN tag insert/strip */ + ctrl = E1000_READ_REG(hw, E1000_CTRL); + ctrl &= ~E1000_CTRL_VME; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + } + +#ifndef CONFIG_IGB_VMDQ_NETDEV + for (i = 0; i < adapter->vmdq_pools; i++) { + igb_set_vf_vlan_strip(adapter, + adapter->vfs_allocated_count + i, + enable); + } + +#else + igb_set_vf_vlan_strip(adapter, + adapter->vfs_allocated_count, + enable); + + for (i = 1; i < adapter->vmdq_pools; i++) { +#ifdef HAVE_VLAN_RX_REGISTER + struct igb_vmdq_adapter *vadapter; + vadapter = netdev_priv(adapter->vmdq_netdev[i-1]); + enable = !!vadapter->vlgrp; +#else + struct net_device *vnetdev; + vnetdev = adapter->vmdq_netdev[i-1]; +#ifdef NETIF_F_HW_VLAN_CTAG_RX + enable = !!(vnetdev->features & NETIF_F_HW_VLAN_CTAG_RX); +#else + enable = !!(vnetdev->features & NETIF_F_HW_VLAN_RX); +#endif +#endif + igb_set_vf_vlan_strip(adapter, + adapter->vfs_allocated_count + i, + enable); + } + +#endif + igb_rlpml_set(adapter); +} + +#ifdef HAVE_VLAN_PROTOCOL +static int igb_vlan_rx_add_vid(struct net_device *netdev, __be16 proto, u16 vid) +#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef NETIF_F_HW_VLAN_CTAG_RX +static int igb_vlan_rx_add_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +#else +static int igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +#endif +#else +static void igb_vlan_rx_add_vid(struct net_device *netdev, u16 vid) +#endif +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int pf_id = adapter->vfs_allocated_count; + + /* attempt to add filter to vlvf array */ + igb_vlvf_set(adapter, vid, TRUE, pf_id); + + /* add the filter since PF can receive vlans w/o entry in vlvf */ + igb_vfta_set(adapter, vid, TRUE); +#ifndef HAVE_NETDEV_VLAN_FEATURES + + /* Copy feature flags from netdev to the vlan netdev for this vid. + * This allows things like TSO to bubble down to our vlan device. + * There is no need to update netdev for vlan 0 (DCB), since it + * wouldn't has v_netdev. + */ + if (adapter->vlgrp) { + struct vlan_group *vlgrp = adapter->vlgrp; + struct net_device *v_netdev = vlan_group_get_device(vlgrp, vid); + if (v_netdev) { + v_netdev->features |= netdev->features; + vlan_group_set_device(vlgrp, vid, v_netdev); + } + } +#endif +#ifndef HAVE_VLAN_RX_REGISTER + + set_bit(vid, adapter->active_vlans); +#endif +#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID + return 0; +#endif +} + +#ifdef HAVE_VLAN_PROTOCOL +static int igb_vlan_rx_kill_vid(struct net_device *netdev, __be16 proto, u16 vid) +#elif defined HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef NETIF_F_HW_VLAN_CTAG_RX +static int igb_vlan_rx_kill_vid(struct net_device *netdev, + __always_unused __be16 proto, u16 vid) +#else +static int igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +#endif +#else +static void igb_vlan_rx_kill_vid(struct net_device *netdev, u16 vid) +#endif +{ + struct igb_adapter *adapter = netdev_priv(netdev); + int pf_id = adapter->vfs_allocated_count; + s32 err; + +#ifdef HAVE_VLAN_RX_REGISTER + igb_irq_disable(adapter); + + vlan_group_set_device(adapter->vlgrp, vid, NULL); + + if (!test_bit(__IGB_DOWN, &adapter->state)) + igb_irq_enable(adapter); + +#endif /* HAVE_VLAN_RX_REGISTER */ + /* remove vlan from VLVF table array */ + err = igb_vlvf_set(adapter, vid, FALSE, pf_id); + + /* if vid was not present in VLVF just remove it from table */ + if (err) + igb_vfta_set(adapter, vid, FALSE); +#ifndef HAVE_VLAN_RX_REGISTER + + clear_bit(vid, adapter->active_vlans); +#endif +#ifdef HAVE_INT_NDO_VLAN_RX_ADD_VID + return 0; +#endif +} + +static void igb_restore_vlan(struct igb_adapter *adapter) +{ +#ifdef HAVE_VLAN_RX_REGISTER + igb_vlan_mode(adapter->netdev, adapter->vlgrp); + + if (adapter->vlgrp) { + u16 vid; + for (vid = 0; vid < VLAN_N_VID; vid++) { + if (!vlan_group_get_device(adapter->vlgrp, vid)) + continue; +#ifdef NETIF_F_HW_VLAN_CTAG_RX + igb_vlan_rx_add_vid(adapter->netdev, + htons(ETH_P_8021Q), vid); +#else + igb_vlan_rx_add_vid(adapter->netdev, vid); +#endif + } + } +#else + u16 vid; + + igb_vlan_mode(adapter->netdev, adapter->netdev->features); + + for_each_set_bit(vid, adapter->active_vlans, VLAN_N_VID) +#ifdef NETIF_F_HW_VLAN_CTAG_RX + igb_vlan_rx_add_vid(adapter->netdev, + htons(ETH_P_8021Q), vid); +#else + igb_vlan_rx_add_vid(adapter->netdev, vid); +#endif +#endif +} + +int igb_set_spd_dplx(struct igb_adapter *adapter, u16 spddplx) +{ + struct pci_dev *pdev = adapter->pdev; + struct e1000_mac_info *mac = &adapter->hw.mac; + + mac->autoneg = 0; + + /* SerDes device's does not support 10Mbps Full/duplex + * and 100Mbps Half duplex + */ + if (adapter->hw.phy.media_type == e1000_media_type_internal_serdes) { + switch (spddplx) { + case SPEED_10 + DUPLEX_HALF: + case SPEED_10 + DUPLEX_FULL: + case SPEED_100 + DUPLEX_HALF: + dev_err(pci_dev_to_dev(pdev), + "Unsupported Speed/Duplex configuration\n"); + return -EINVAL; + default: + break; + } + } + + switch (spddplx) { + case SPEED_10 + DUPLEX_HALF: + mac->forced_speed_duplex = ADVERTISE_10_HALF; + break; + case SPEED_10 + DUPLEX_FULL: + mac->forced_speed_duplex = ADVERTISE_10_FULL; + break; + case SPEED_100 + DUPLEX_HALF: + mac->forced_speed_duplex = ADVERTISE_100_HALF; + break; + case SPEED_100 + DUPLEX_FULL: + mac->forced_speed_duplex = ADVERTISE_100_FULL; + break; + case SPEED_1000 + DUPLEX_FULL: + mac->autoneg = 1; + adapter->hw.phy.autoneg_advertised = ADVERTISE_1000_FULL; + break; + case SPEED_1000 + DUPLEX_HALF: /* not supported */ + default: + dev_err(pci_dev_to_dev(pdev), "Unsupported Speed/Duplex configuration\n"); + return -EINVAL; + } + + /* clear MDI, MDI(-X) override is only allowed when autoneg enabled */ + adapter->hw.phy.mdix = AUTO_ALL_MODES; + + return 0; +} + +static int __igb_shutdown(struct pci_dev *pdev, bool *enable_wake, + bool runtime) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 ctrl, rctl, status; + u32 wufc = runtime ? E1000_WUFC_LNKC : adapter->wol; +#ifdef CONFIG_PM + int retval = 0; +#endif + + netif_device_detach(netdev); + + status = E1000_READ_REG(hw, E1000_STATUS); + if (status & E1000_STATUS_LU) + wufc &= ~E1000_WUFC_LNKC; + + if (netif_running(netdev)) + __igb_close(netdev, true); + + igb_clear_interrupt_scheme(adapter); + +#ifdef CONFIG_PM + retval = pci_save_state(pdev); + if (retval) + return retval; +#endif + + if (wufc) { + igb_setup_rctl(adapter); + igb_set_rx_mode(netdev); + + /* turn on all-multi mode if wake on multicast is enabled */ + if (wufc & E1000_WUFC_MC) { + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl |= E1000_RCTL_MPE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + } + + ctrl = E1000_READ_REG(hw, E1000_CTRL); + /* phy power management enable */ + #define E1000_CTRL_EN_PHY_PWR_MGMT 0x00200000 + ctrl |= E1000_CTRL_ADVD3WUC; + E1000_WRITE_REG(hw, E1000_CTRL, ctrl); + + /* Allow time for pending master requests to run */ + e1000_disable_pcie_master(hw); + + E1000_WRITE_REG(hw, E1000_WUC, E1000_WUC_PME_EN); + E1000_WRITE_REG(hw, E1000_WUFC, wufc); + } else { + E1000_WRITE_REG(hw, E1000_WUC, 0); + E1000_WRITE_REG(hw, E1000_WUFC, 0); + } + + *enable_wake = wufc || adapter->en_mng_pt; + if (!*enable_wake) + igb_power_down_link(adapter); + else + igb_power_up_link(adapter); + + /* Release control of h/w to f/w. If f/w is AMT enabled, this + * would have already happened in close and is redundant. */ + igb_release_hw_control(adapter); + + pci_disable_device(pdev); + + return 0; +} + +#ifdef CONFIG_PM +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_suspend(struct device *dev) +#else +static int igb_suspend(struct pci_dev *pdev, pm_message_t state) +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +{ +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS + struct pci_dev *pdev = to_pci_dev(dev); +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ + int retval; + bool wake; + + retval = __igb_shutdown(pdev, &wake, 0); + if (retval) + return retval; + + if (wake) { + pci_prepare_to_sleep(pdev); + } else { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_resume(struct device *dev) +#else +static int igb_resume(struct pci_dev *pdev) +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +{ +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS + struct pci_dev *pdev = to_pci_dev(dev); +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + u32 err; + + pci_set_power_state(pdev, PCI_D0); + pci_restore_state(pdev); + pci_save_state(pdev); + + err = pci_enable_device_mem(pdev); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "igb: Cannot enable PCI device from suspend\n"); + return err; + } + pci_set_master(pdev); + + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); + + if (igb_init_interrupt_scheme(adapter, true)) { + dev_err(pci_dev_to_dev(pdev), "Unable to allocate memory for queues\n"); + return -ENOMEM; + } + + igb_reset(adapter); + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + + E1000_WRITE_REG(hw, E1000_WUS, ~0); + + if (netdev->flags & IFF_UP) { + rtnl_lock(); + err = __igb_open(netdev, true); + rtnl_unlock(); + if (err) + return err; + } + + netif_device_attach(netdev); + + return 0; +} + +#ifdef CONFIG_PM_RUNTIME +#ifdef HAVE_SYSTEM_SLEEP_PM_OPS +static int igb_runtime_idle(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + + if (!igb_has_link(adapter)) + pm_schedule_suspend(dev, MSEC_PER_SEC * 5); + + return -EBUSY; +} + +static int igb_runtime_suspend(struct device *dev) +{ + struct pci_dev *pdev = to_pci_dev(dev); + int retval; + bool wake; + + retval = __igb_shutdown(pdev, &wake, 1); + if (retval) + return retval; + + if (wake) { + pci_prepare_to_sleep(pdev); + } else { + pci_wake_from_d3(pdev, false); + pci_set_power_state(pdev, PCI_D3hot); + } + + return 0; +} + +static int igb_runtime_resume(struct device *dev) +{ + return igb_resume(dev); +} +#endif /* HAVE_SYSTEM_SLEEP_PM_OPS */ +#endif /* CONFIG_PM_RUNTIME */ +#endif /* CONFIG_PM */ + +#ifdef USE_REBOOT_NOTIFIER +/* only want to do this for 2.4 kernels? */ +static int igb_notify_reboot(struct notifier_block *nb, unsigned long event, + void *p) +{ + struct pci_dev *pdev = NULL; + bool wake; + + switch (event) { + case SYS_DOWN: + case SYS_HALT: + case SYS_POWER_OFF: + while ((pdev = pci_find_device(PCI_ANY_ID, PCI_ANY_ID, pdev))) { + if (pci_dev_driver(pdev) == &igb_driver) { + __igb_shutdown(pdev, &wake, 0); + if (event == SYS_POWER_OFF) { + pci_wake_from_d3(pdev, wake); + pci_set_power_state(pdev, PCI_D3hot); + } + } + } + } + return NOTIFY_DONE; +} +#else +static void igb_shutdown(struct pci_dev *pdev) +{ + bool wake = false; + + __igb_shutdown(pdev, &wake, 0); + + if (system_state == SYSTEM_POWER_OFF) { + pci_wake_from_d3(pdev, wake); + pci_set_power_state(pdev, PCI_D3hot); + } +} +#endif /* USE_REBOOT_NOTIFIER */ + +#ifdef CONFIG_NET_POLL_CONTROLLER +/* + * Polling 'interrupt' - used by things like netconsole to send skbs + * without having to re-enable interrupts. It's not called while + * the interrupt routine is executing. + */ +static void igb_netpoll(struct net_device *netdev) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct igb_q_vector *q_vector; + int i; + + for (i = 0; i < adapter->num_q_vectors; i++) { + q_vector = adapter->q_vector[i]; + if (adapter->msix_entries) + E1000_WRITE_REG(hw, E1000_EIMC, q_vector->eims_value); + else + igb_irq_disable(adapter); + napi_schedule(&q_vector->napi); + } +} +#endif /* CONFIG_NET_POLL_CONTROLLER */ + +#ifdef HAVE_PCI_ERS +#define E1000_DEV_ID_82576_VF 0x10CA +/** + * igb_io_error_detected - called when PCI error is detected + * @pdev: Pointer to PCI device + * @state: The current pci connection state + * + * This function is called after a PCI bus error affecting + * this device has been detected. + */ +static pci_ers_result_t igb_io_error_detected(struct pci_dev *pdev, + pci_channel_state_t state) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + +#ifdef CONFIG_PCI_IOV__UNUSED + struct pci_dev *bdev, *vfdev; + u32 dw0, dw1, dw2, dw3; + int vf, pos; + u16 req_id, pf_func; + + if (!(adapter->flags & IGB_FLAG_DETECT_BAD_DMA)) + goto skip_bad_vf_detection; + + bdev = pdev->bus->self; + while (bdev && (pci_pcie_type(bdev) != PCI_EXP_TYPE_ROOT_PORT)) + bdev = bdev->bus->self; + + if (!bdev) + goto skip_bad_vf_detection; + + pos = pci_find_ext_capability(bdev, PCI_EXT_CAP_ID_ERR); + if (!pos) + goto skip_bad_vf_detection; + + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG, &dw0); + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 4, &dw1); + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 8, &dw2); + pci_read_config_dword(bdev, pos + PCI_ERR_HEADER_LOG + 12, &dw3); + + req_id = dw1 >> 16; + /* On the 82576 if bit 7 of the requestor ID is set then it's a VF */ + if (!(req_id & 0x0080)) + goto skip_bad_vf_detection; + + pf_func = req_id & 0x01; + if ((pf_func & 1) == (pdev->devfn & 1)) { + + vf = (req_id & 0x7F) >> 1; + dev_err(pci_dev_to_dev(pdev), + "VF %d has caused a PCIe error\n", vf); + dev_err(pci_dev_to_dev(pdev), + "TLP: dw0: %8.8x\tdw1: %8.8x\tdw2: " + "%8.8x\tdw3: %8.8x\n", + dw0, dw1, dw2, dw3); + + /* Find the pci device of the offending VF */ + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, + E1000_DEV_ID_82576_VF, NULL); + while (vfdev) { + if (vfdev->devfn == (req_id & 0xFF)) + break; + vfdev = pci_get_device(PCI_VENDOR_ID_INTEL, + E1000_DEV_ID_82576_VF, vfdev); + } + /* + * There's a slim chance the VF could have been hot plugged, + * so if it is no longer present we don't need to issue the + * VFLR. Just clean up the AER in that case. + */ + if (vfdev) { + dev_err(pci_dev_to_dev(pdev), + "Issuing VFLR to VF %d\n", vf); + pci_write_config_dword(vfdev, 0xA8, 0x00008000); + } + + pci_cleanup_aer_uncorrect_error_status(pdev); + } + + /* + * Even though the error may have occurred on the other port + * we still need to increment the vf error reference count for + * both ports because the I/O resume function will be called + * for both of them. + */ + adapter->vferr_refcount++; + + return PCI_ERS_RESULT_RECOVERED; + +skip_bad_vf_detection: +#endif /* CONFIG_PCI_IOV */ + + netif_device_detach(netdev); + + if (state == pci_channel_io_perm_failure) + return PCI_ERS_RESULT_DISCONNECT; + + if (netif_running(netdev)) + igb_down(adapter); + pci_disable_device(pdev); + + /* Request a slot slot reset. */ + return PCI_ERS_RESULT_NEED_RESET; +} + +/** + * igb_io_slot_reset - called after the pci bus has been reset. + * @pdev: Pointer to PCI device + * + * Restart the card from scratch, as if from a cold-boot. Implementation + * resembles the first-half of the igb_resume routine. + */ +static pci_ers_result_t igb_io_slot_reset(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + pci_ers_result_t result; + + if (pci_enable_device_mem(pdev)) { + dev_err(pci_dev_to_dev(pdev), + "Cannot re-enable PCI device after reset.\n"); + result = PCI_ERS_RESULT_DISCONNECT; + } else { + pci_set_master(pdev); + pci_restore_state(pdev); + pci_save_state(pdev); + + pci_enable_wake(pdev, PCI_D3hot, 0); + pci_enable_wake(pdev, PCI_D3cold, 0); + + schedule_work(&adapter->reset_task); + E1000_WRITE_REG(hw, E1000_WUS, ~0); + result = PCI_ERS_RESULT_RECOVERED; + } + + pci_cleanup_aer_uncorrect_error_status(pdev); + + return result; +} + +/** + * igb_io_resume - called when traffic can start flowing again. + * @pdev: Pointer to PCI device + * + * This callback is called when the error recovery driver tells us that + * its OK to resume normal operation. Implementation resembles the + * second-half of the igb_resume routine. + */ +static void igb_io_resume(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct igb_adapter *adapter = netdev_priv(netdev); + + if (adapter->vferr_refcount) { + dev_info(pci_dev_to_dev(pdev), "Resuming after VF err\n"); + adapter->vferr_refcount--; + return; + } + + if (netif_running(netdev)) { + if (igb_up(adapter)) { + dev_err(pci_dev_to_dev(pdev), "igb_up failed after reset\n"); + return; + } + } + + netif_device_attach(netdev); + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); +} + +#endif /* HAVE_PCI_ERS */ + +int igb_add_mac_filter(struct igb_adapter *adapter, u8 *addr, u16 queue) +{ + struct e1000_hw *hw = &adapter->hw; + int i; + + if (is_zero_ether_addr(addr)) + return 0; + + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (adapter->mac_table[i].state & IGB_MAC_STATE_IN_USE) + continue; + adapter->mac_table[i].state = (IGB_MAC_STATE_MODIFIED | + IGB_MAC_STATE_IN_USE); + memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN); + adapter->mac_table[i].queue = queue; + igb_sync_mac_table(adapter); + return 0; + } + return -ENOMEM; +} +int igb_del_mac_filter(struct igb_adapter *adapter, u8* addr, u16 queue) +{ + /* search table for addr, if found, set to 0 and sync */ + int i; + struct e1000_hw *hw = &adapter->hw; + + if (is_zero_ether_addr(addr)) + return 0; + for (i = 0; i < hw->mac.rar_entry_count; i++) { + if (ether_addr_equal(addr, adapter->mac_table[i].addr) && + adapter->mac_table[i].queue == queue) { + adapter->mac_table[i].state = IGB_MAC_STATE_MODIFIED; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + igb_sync_mac_table(adapter); + return 0; + } + } + return -ENOMEM; +} +static int igb_set_vf_mac(struct igb_adapter *adapter, + int vf, unsigned char *mac_addr) +{ + igb_del_mac_filter(adapter, adapter->vf_data[vf].vf_mac_addresses, vf); + memcpy(adapter->vf_data[vf].vf_mac_addresses, mac_addr, ETH_ALEN); + + igb_add_mac_filter(adapter, mac_addr, vf); + + return 0; +} + +#ifdef IFLA_VF_MAX +static int igb_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + if (!is_valid_ether_addr(mac) || (vf >= adapter->vfs_allocated_count)) + return -EINVAL; + adapter->vf_data[vf].flags |= IGB_VF_FLAG_PF_SET_MAC; + dev_info(&adapter->pdev->dev, "setting MAC %pM on VF %d\n", mac, vf); + dev_info(&adapter->pdev->dev, "Reload the VF driver to make this" + " change effective.\n"); + if (test_bit(__IGB_DOWN, &adapter->state)) { + dev_warn(&adapter->pdev->dev, "The VF MAC address has been set," + " but the PF device is not up.\n"); + dev_warn(&adapter->pdev->dev, "Bring the PF device up before" + " attempting to use the VF device.\n"); + } + return igb_set_vf_mac(adapter, vf, mac); +} + +static int igb_link_mbps(int internal_link_speed) +{ + switch (internal_link_speed) { + case SPEED_100: + return 100; + case SPEED_1000: + return 1000; + case SPEED_2500: + return 2500; + default: + return 0; + } +} + +static void igb_set_vf_rate_limit(struct e1000_hw *hw, int vf, int tx_rate, + int link_speed) +{ + int rf_dec, rf_int; + u32 bcnrc_val; + + if (tx_rate != 0) { + /* Calculate the rate factor values to set */ + rf_int = link_speed / tx_rate; + rf_dec = (link_speed - (rf_int * tx_rate)); + rf_dec = (rf_dec * (1<vf_rate_link_speed == 0) || + (adapter->hw.mac.type != e1000_82576)) + return; + + actual_link_speed = igb_link_mbps(adapter->link_speed); + if (actual_link_speed != adapter->vf_rate_link_speed) { + reset_rate = true; + adapter->vf_rate_link_speed = 0; + dev_info(&adapter->pdev->dev, + "Link speed has been changed. VF Transmit rate is disabled\n"); + } + + for (i = 0; i < adapter->vfs_allocated_count; i++) { + if (reset_rate) + adapter->vf_data[i].tx_rate = 0; + + igb_set_vf_rate_limit(&adapter->hw, i, + adapter->vf_data[i].tx_rate, actual_link_speed); + } +} + +#ifdef HAVE_VF_MIN_MAX_TXRATE +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int min_tx_rate, + int tx_rate) +#else /* HAVE_VF_MIN_MAX_TXRATE */ +static int igb_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate) +#endif /* HAVE_VF_MIN_MAX_TXRATE */ +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + int actual_link_speed; + + if (hw->mac.type != e1000_82576) + return -EOPNOTSUPP; + +#ifdef HAVE_VF_MIN_MAX_TXRATE + if (min_tx_rate) + return -EINVAL; +#endif /* HAVE_VF_MIN_MAX_TXRATE */ + + actual_link_speed = igb_link_mbps(adapter->link_speed); + if ((vf >= adapter->vfs_allocated_count) || + (!(E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_LU)) || + (tx_rate < 0) || (tx_rate > actual_link_speed)) + return -EINVAL; + + adapter->vf_rate_link_speed = actual_link_speed; + adapter->vf_data[vf].tx_rate = (u16)tx_rate; + igb_set_vf_rate_limit(hw, vf, tx_rate, actual_link_speed); + + return 0; +} + +static int igb_ndo_get_vf_config(struct net_device *netdev, + int vf, struct ifla_vf_info *ivi) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + if (vf >= adapter->vfs_allocated_count) + return -EINVAL; + ivi->vf = vf; + memcpy(&ivi->mac, adapter->vf_data[vf].vf_mac_addresses, ETH_ALEN); +#ifdef HAVE_VF_MIN_MAX_TXRATE + ivi->max_tx_rate = adapter->vf_data[vf].tx_rate; + ivi->min_tx_rate = 0; +#else /* HAVE_VF_MIN_MAX_TXRATE */ + ivi->tx_rate = adapter->vf_data[vf].tx_rate; +#endif /* HAVE_VF_MIN_MAX_TXRATE */ + ivi->vlan = adapter->vf_data[vf].pf_vlan; + ivi->qos = adapter->vf_data[vf].pf_qos; +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE + ivi->spoofchk = adapter->vf_data[vf].spoofchk_enabled; +#endif + return 0; +} +#endif +static void igb_vmm_control(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + int count; + u32 reg; + + switch (hw->mac.type) { + case e1000_82575: + default: + /* replication is not supported for 82575 */ + return; + case e1000_82576: + /* notify HW that the MAC is adding vlan tags */ + reg = E1000_READ_REG(hw, E1000_DTXCTL); + reg |= (E1000_DTXCTL_VLAN_ADDED | + E1000_DTXCTL_SPOOF_INT); + E1000_WRITE_REG(hw, E1000_DTXCTL, reg); + case e1000_82580: + /* enable replication vlan tag stripping */ + reg = E1000_READ_REG(hw, E1000_RPLOLR); + reg |= E1000_RPLOLR_STRVLAN; + E1000_WRITE_REG(hw, E1000_RPLOLR, reg); + case e1000_i350: + case e1000_i354: + /* none of the above registers are supported by i350 */ + break; + } + + /* Enable Malicious Driver Detection */ + if ((adapter->vfs_allocated_count) && + (adapter->mdd)) { + if (hw->mac.type == e1000_i350) + igb_enable_mdd(adapter); + } + + /* enable replication and loopback support */ + count = adapter->vfs_allocated_count || adapter->vmdq_pools; + if (adapter->flags & IGB_FLAG_LOOPBACK_ENABLE && count) + e1000_vmdq_set_loopback_pf(hw, 1); + e1000_vmdq_set_anti_spoofing_pf(hw, + adapter->vfs_allocated_count || adapter->vmdq_pools, + adapter->vfs_allocated_count); + e1000_vmdq_set_replication_pf(hw, adapter->vfs_allocated_count || + adapter->vmdq_pools); +} + +static void igb_init_fw(struct igb_adapter *adapter) +{ + struct e1000_fw_drv_info fw_cmd; + struct e1000_hw *hw = &adapter->hw; + int i; + u16 mask; + + if (hw->mac.type == e1000_i210) + mask = E1000_SWFW_EEP_SM; + else + mask = E1000_SWFW_PHY0_SM; + /* i211 parts do not support this feature */ + if (hw->mac.type == e1000_i211) + hw->mac.arc_subsystem_valid = false; + + if (!hw->mac.ops.acquire_swfw_sync(hw, mask)) { + for (i = 0; i <= FW_MAX_RETRIES; i++) { + E1000_WRITE_REG(hw, E1000_FWSTS, E1000_FWSTS_FWRI); + fw_cmd.hdr.cmd = FW_CMD_DRV_INFO; + fw_cmd.hdr.buf_len = FW_CMD_DRV_INFO_LEN; + fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CMD_RESERVED; + fw_cmd.port_num = hw->bus.func; + fw_cmd.drv_version = FW_FAMILY_DRV_VER; + fw_cmd.hdr.checksum = 0; + fw_cmd.hdr.checksum = e1000_calculate_checksum((u8 *)&fw_cmd, + (FW_HDR_LEN + + fw_cmd.hdr.buf_len)); + e1000_host_interface_command(hw, (u8*)&fw_cmd, + sizeof(fw_cmd)); + if (fw_cmd.hdr.cmd_or_resp.ret_status == FW_STATUS_SUCCESS) + break; + } + } else + dev_warn(pci_dev_to_dev(adapter->pdev), + "Unable to get semaphore, firmware init failed.\n"); + hw->mac.ops.release_swfw_sync(hw, mask); +} + +static void igb_init_dmac(struct igb_adapter *adapter, u32 pba) +{ + struct e1000_hw *hw = &adapter->hw; + u32 dmac_thr; + u16 hwm; + u32 status; + + if (hw->mac.type == e1000_i211) + return; + + if (hw->mac.type > e1000_82580) { + if (adapter->dmac != IGB_DMAC_DISABLE) { + u32 reg; + + /* force threshold to 0. */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, 0); + + /* + * DMA Coalescing high water mark needs to be greater + * than the Rx threshold. Set hwm to PBA - max frame + * size in 16B units, capping it at PBA - 6KB. + */ + hwm = 64 * pba - adapter->max_frame_size / 16; + if (hwm < 64 * (pba - 6)) + hwm = 64 * (pba - 6); + reg = E1000_READ_REG(hw, E1000_FCRTC); + reg &= ~E1000_FCRTC_RTH_COAL_MASK; + reg |= ((hwm << E1000_FCRTC_RTH_COAL_SHIFT) + & E1000_FCRTC_RTH_COAL_MASK); + E1000_WRITE_REG(hw, E1000_FCRTC, reg); + + /* + * Set the DMA Coalescing Rx threshold to PBA - 2 * max + * frame size, capping it at PBA - 10KB. + */ + dmac_thr = pba - adapter->max_frame_size / 512; + if (dmac_thr < pba - 10) + dmac_thr = pba - 10; + reg = E1000_READ_REG(hw, E1000_DMACR); + reg &= ~E1000_DMACR_DMACTHR_MASK; + reg |= ((dmac_thr << E1000_DMACR_DMACTHR_SHIFT) + & E1000_DMACR_DMACTHR_MASK); + + /* transition to L0x or L1 if available..*/ + reg |= (E1000_DMACR_DMAC_EN | E1000_DMACR_DMAC_LX_MASK); + + /* Check if status is 2.5Gb backplane connection + * before configuration of watchdog timer, which is + * in msec values in 12.8usec intervals + * watchdog timer= msec values in 32usec intervals + * for non 2.5Gb connection + */ + if (hw->mac.type == e1000_i354) { + status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= ((adapter->dmac * 5) >> 6); + else + reg |= ((adapter->dmac) >> 5); + } else { + reg |= ((adapter->dmac) >> 5); + } + + /* + * Disable BMC-to-OS Watchdog enable + * on devices that support OS-to-BMC + */ + if (hw->mac.type != e1000_i354) + reg &= ~E1000_DMACR_DC_BMC2OSW_EN; + E1000_WRITE_REG(hw, E1000_DMACR, reg); + + /* no lower threshold to disable coalescing(smart fifb)-UTRESH=0*/ + E1000_WRITE_REG(hw, E1000_DMCRTRH, 0); + + /* This sets the time to wait before requesting + * transition to low power state to number of usecs + * needed to receive 1 512 byte frame at gigabit + * line rate. On i350 device, time to make transition + * to Lx state is delayed by 4 usec with flush disable + * bit set to avoid losing mailbox interrupts + */ + reg = E1000_READ_REG(hw, E1000_DMCTLX); + if (hw->mac.type == e1000_i350) + reg |= IGB_DMCTLX_DCFLUSH_DIS; + + /* in 2.5Gb connection, TTLX unit is 0.4 usec + * which is 0x4*2 = 0xA. But delay is still 4 usec + */ + if (hw->mac.type == e1000_i354) { + status = E1000_READ_REG(hw, E1000_STATUS); + if ((status & E1000_STATUS_2P5_SKU) && + (!(status & E1000_STATUS_2P5_SKU_OVER))) + reg |= 0xA; + else + reg |= 0x4; + } else { + reg |= 0x4; + } + E1000_WRITE_REG(hw, E1000_DMCTLX, reg); + + /* free space in tx packet buffer to wake from DMA coal */ + E1000_WRITE_REG(hw, E1000_DMCTXTH, (IGB_MIN_TXPBSIZE - + (IGB_TX_BUF_4096 + adapter->max_frame_size)) >> 6); + + /* make low power state decision controlled by DMA coal */ + reg = E1000_READ_REG(hw, E1000_PCIEMISC); + reg &= ~E1000_PCIEMISC_LX_DECISION; + E1000_WRITE_REG(hw, E1000_PCIEMISC, reg); + } /* endif adapter->dmac is not disabled */ + } else if (hw->mac.type == e1000_82580) { + u32 reg = E1000_READ_REG(hw, E1000_PCIEMISC); + E1000_WRITE_REG(hw, E1000_PCIEMISC, + reg & ~E1000_PCIEMISC_LX_DECISION); + E1000_WRITE_REG(hw, E1000_DMACR, 0); + } +} + +#ifdef HAVE_I2C_SUPPORT +/* igb_read_i2c_byte - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @dev_addr: device address + * @data: value read + * + * Performs byte read operation over I2C interface at + * a specified device address. + */ +s32 igb_read_i2c_byte(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); + struct i2c_client *this_client = adapter->i2c_client; + s32 status; + u16 swfw_mask = 0; + + if (!this_client) + return E1000_ERR_I2C; + + swfw_mask = E1000_SWFW_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) + != E1000_SUCCESS) + return E1000_ERR_SWFW_SYNC; + + status = i2c_smbus_read_byte_data(this_client, byte_offset); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + if (status < 0) + return E1000_ERR_I2C; + else { + *data = status; + return E1000_SUCCESS; + } +} + +/* igb_write_i2c_byte - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @dev_addr: device address + * @data: value to write + * + * Performs byte write operation over I2C interface at + * a specified device address. + */ +s32 igb_write_i2c_byte(struct e1000_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + struct igb_adapter *adapter = container_of(hw, struct igb_adapter, hw); + struct i2c_client *this_client = adapter->i2c_client; + s32 status; + u16 swfw_mask = E1000_SWFW_PHY0_SM; + + if (!this_client) + return E1000_ERR_I2C; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != E1000_SUCCESS) + return E1000_ERR_SWFW_SYNC; + status = i2c_smbus_write_byte_data(this_client, byte_offset, data); + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + + if (status) + return E1000_ERR_I2C; + else + return E1000_SUCCESS; +} +#endif /* HAVE_I2C_SUPPORT */ +/* igb_main.c */ + + +/** + * igb_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in igb_pci_tbl + * + * Returns 0 on success, negative on failure + * + * igb_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +int igb_kni_probe(struct pci_dev *pdev, + struct net_device **lad_dev) +{ + struct net_device *netdev; + struct igb_adapter *adapter; + struct e1000_hw *hw; + u16 eeprom_data = 0; + u8 pba_str[E1000_PBANUM_LENGTH]; + s32 ret_val; + static int global_quad_port_a; /* global quad port a indication */ + int i, err, pci_using_dac = 0; + static int cards_found; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + +#ifdef NO_KNI + pci_using_dac = 0; + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(64)); + if (!err) + pci_using_dac = 1; + } else { + err = dma_set_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + err = dma_set_coherent_mask(pci_dev_to_dev(pdev), DMA_BIT_MASK(32)); + if (err) { + IGB_ERR("No usable DMA configuration, " + "aborting\n"); + goto err_dma; + } + } + } + +#ifndef HAVE_ASPM_QUIRKS + /* 82575 requires that the pci-e link partner disable the L0s state */ + switch (pdev->device) { + case E1000_DEV_ID_82575EB_COPPER: + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82575GB_QUAD_COPPER: + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + default: + break; + } + +#endif /* HAVE_ASPM_QUIRKS */ + err = pci_request_selected_regions(pdev, + pci_select_bars(pdev, + IORESOURCE_MEM), + igb_driver_name); + if (err) + goto err_pci_reg; + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); + + err = -ENOMEM; +#endif /* NO_KNI */ +#ifdef HAVE_TX_MQ + netdev = alloc_etherdev_mq(sizeof(struct igb_adapter), + IGB_MAX_TX_QUEUES); +#else + netdev = alloc_etherdev(sizeof(struct igb_adapter)); +#endif /* HAVE_TX_MQ */ + if (!netdev) + goto err_alloc_etherdev; + + SET_MODULE_OWNER(netdev); + SET_NETDEV_DEV(netdev, &pdev->dev); + + //pci_set_drvdata(pdev, netdev); + adapter = netdev_priv(netdev); + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->port_num = hw->bus.func; + adapter->msg_enable = (1 << debug) - 1; + +#ifdef HAVE_PCI_ERS + err = pci_save_state(pdev); + if (err) + goto err_ioremap; +#endif + err = -EIO; + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) + goto err_ioremap; + +#ifdef HAVE_NET_DEVICE_OPS + netdev->netdev_ops = &igb_netdev_ops; +#else /* HAVE_NET_DEVICE_OPS */ + netdev->open = &igb_open; + netdev->stop = &igb_close; + netdev->get_stats = &igb_get_stats; +#ifdef HAVE_SET_RX_MODE + netdev->set_rx_mode = &igb_set_rx_mode; +#endif + netdev->set_multicast_list = &igb_set_rx_mode; + netdev->set_mac_address = &igb_set_mac; + netdev->change_mtu = &igb_change_mtu; + netdev->do_ioctl = &igb_ioctl; +#ifdef HAVE_TX_TIMEOUT + netdev->tx_timeout = &igb_tx_timeout; +#endif + netdev->vlan_rx_register = igb_vlan_mode; + netdev->vlan_rx_add_vid = igb_vlan_rx_add_vid; + netdev->vlan_rx_kill_vid = igb_vlan_rx_kill_vid; +#ifdef CONFIG_NET_POLL_CONTROLLER + netdev->poll_controller = igb_netpoll; +#endif + netdev->hard_start_xmit = &igb_xmit_frame; +#endif /* HAVE_NET_DEVICE_OPS */ + igb_set_ethtool_ops(netdev); +#ifdef HAVE_TX_TIMEOUT + netdev->watchdog_timeo = 5 * HZ; +#endif + + strncpy(netdev->name, pci_name(pdev), sizeof(netdev->name) - 1); + + adapter->bd_number = cards_found; + + /* setup the private structure */ + err = igb_sw_init(adapter); + if (err) + goto err_sw_init; + + e1000_get_bus_info(hw); + + hw->phy.autoneg_wait_to_complete = FALSE; + hw->mac.adaptive_ifs = FALSE; + + /* Copper options */ + if (hw->phy.media_type == e1000_media_type_copper) { + hw->phy.mdix = AUTO_ALL_MODES; + hw->phy.disable_polarity_correction = FALSE; + hw->phy.ms_type = e1000_ms_hw_default; + } + + if (e1000_check_reset_block(hw)) + dev_info(pci_dev_to_dev(pdev), + "PHY reset is blocked due to SOL/IDER session.\n"); + + /* + * features is initialized to 0 in allocation, it might have bits + * set by igb_sw_init so we should use an or instead of an + * assignment. + */ + netdev->features |= NETIF_F_SG | + NETIF_F_IP_CSUM | +#ifdef NETIF_F_IPV6_CSUM + NETIF_F_IPV6_CSUM | +#endif +#ifdef NETIF_F_TSO + NETIF_F_TSO | +#ifdef NETIF_F_TSO6 + NETIF_F_TSO6 | +#endif +#endif /* NETIF_F_TSO */ +#ifdef NETIF_F_RXHASH + NETIF_F_RXHASH | +#endif + NETIF_F_RXCSUM | +#ifdef NETIF_F_HW_VLAN_CTAG_RX + NETIF_F_HW_VLAN_CTAG_RX | + NETIF_F_HW_VLAN_CTAG_TX; +#else + NETIF_F_HW_VLAN_RX | + NETIF_F_HW_VLAN_TX; +#endif + + if (hw->mac.type >= e1000_82576) + netdev->features |= NETIF_F_SCTP_CSUM; + +#ifdef HAVE_NDO_SET_FEATURES + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= netdev->features; +#ifndef IGB_NO_LRO + + /* give us the option of enabling LRO later */ + netdev->hw_features |= NETIF_F_LRO; +#endif +#else +#ifdef NETIF_F_GRO + + /* this is only needed on kernels prior to 2.6.39 */ + netdev->features |= NETIF_F_GRO; +#endif +#endif + + /* set this bit last since it cannot be part of hw_features */ +#ifdef NETIF_F_HW_VLAN_CTAG_FILTER + netdev->features |= NETIF_F_HW_VLAN_CTAG_FILTER; +#else + netdev->features |= NETIF_F_HW_VLAN_FILTER; +#endif + +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_TSO | + NETIF_F_TSO6 | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_SG; + +#endif + if (pci_using_dac) + netdev->features |= NETIF_F_HIGHDMA; + +#ifdef NO_KNI + adapter->en_mng_pt = e1000_enable_mng_pass_thru(hw); +#ifdef DEBUG + if (adapter->dmac != IGB_DMAC_DISABLE) + printk("%s: DMA Coalescing is enabled..\n", netdev->name); +#endif + + /* before reading the NVM, reset the controller to put the device in a + * known good starting state */ + e1000_reset_hw(hw); +#endif /* NO_KNI */ + + /* make sure the NVM is good */ + if (e1000_validate_nvm_checksum(hw) < 0) { + dev_err(pci_dev_to_dev(pdev), "The NVM Checksum Is Not" + " Valid\n"); + err = -EIO; + goto err_eeprom; + } + + /* copy the MAC address out of the NVM */ + if (e1000_read_mac_addr(hw)) + dev_err(pci_dev_to_dev(pdev), "NVM Read Error\n"); + memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); +#ifdef ETHTOOL_GPERMADDR + memcpy(netdev->perm_addr, hw->mac.addr, netdev->addr_len); + + if (!is_valid_ether_addr(netdev->perm_addr)) { +#else + if (!is_valid_ether_addr(netdev->dev_addr)) { +#endif + dev_err(pci_dev_to_dev(pdev), "Invalid MAC Address\n"); + err = -EIO; + goto err_eeprom; + } + + memcpy(&adapter->mac_table[0].addr, hw->mac.addr, netdev->addr_len); + adapter->mac_table[0].queue = adapter->vfs_allocated_count; + adapter->mac_table[0].state = (IGB_MAC_STATE_DEFAULT | IGB_MAC_STATE_IN_USE); + igb_rar_set(adapter, 0); + + /* get firmware version for ethtool -i */ + igb_set_fw_version(adapter); + + /* Check if Media Autosense is enabled */ + if (hw->mac.type == e1000_82580) + igb_init_mas(adapter); + +#ifdef NO_KNI + setup_timer(&adapter->watchdog_timer, &igb_watchdog, + (unsigned long) adapter); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + setup_timer(&adapter->dma_err_timer, &igb_dma_err_timer, + (unsigned long) adapter); + setup_timer(&adapter->phy_info_timer, &igb_update_phy_info, + (unsigned long) adapter); + + INIT_WORK(&adapter->reset_task, igb_reset_task); + INIT_WORK(&adapter->watchdog_task, igb_watchdog_task); + if (adapter->flags & IGB_FLAG_DETECT_BAD_DMA) + INIT_WORK(&adapter->dma_err_task, igb_dma_err_task); +#endif + + /* Initialize link properties that are user-changeable */ + adapter->fc_autoneg = true; + hw->mac.autoneg = true; + hw->phy.autoneg_advertised = 0x2f; + + hw->fc.requested_mode = e1000_fc_default; + hw->fc.current_mode = e1000_fc_default; + + e1000_validate_mdi_setting(hw); + + /* By default, support wake on port A */ + if (hw->bus.func == 0) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* Check the NVM for wake support for non-port A ports */ + if (hw->mac.type >= e1000_82580) + hw->nvm.ops.read(hw, NVM_INIT_CONTROL3_PORT_A + + NVM_82580_LAN_FUNC_OFFSET(hw->bus.func), 1, + &eeprom_data); + else if (hw->bus.func == 1) + e1000_read_nvm(hw, NVM_INIT_CONTROL3_PORT_B, 1, &eeprom_data); + + if (eeprom_data & IGB_EEPROM_APME) + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + + /* now that we have the eeprom settings, apply the special cases where + * the eeprom may be wrong or the board simply won't support wake on + * lan on a particular port */ + switch (pdev->device) { + case E1000_DEV_ID_82575GB_QUAD_COPPER: + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82575EB_FIBER_SERDES: + case E1000_DEV_ID_82576_FIBER: + case E1000_DEV_ID_82576_SERDES: + /* Wake events only supported on port A for dual fiber + * regardless of eeprom setting */ + if (E1000_READ_REG(hw, E1000_STATUS) & E1000_STATUS_FUNC_1) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + case E1000_DEV_ID_82576_QUAD_COPPER: + case E1000_DEV_ID_82576_QUAD_COPPER_ET2: + /* if quad port adapter, disable WoL on all but port A */ + if (global_quad_port_a != 0) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + else + adapter->flags |= IGB_FLAG_QUAD_PORT_A; + /* Reset for multiple quad port adapters */ + if (++global_quad_port_a == 4) + global_quad_port_a = 0; + break; + default: + /* If the device can't wake, don't set software support */ + if (!device_can_wakeup(&adapter->pdev->dev)) + adapter->flags &= ~IGB_FLAG_WOL_SUPPORTED; + break; + } + + /* initialize the wol settings based on the eeprom settings */ + if (adapter->flags & IGB_FLAG_WOL_SUPPORTED) + adapter->wol |= E1000_WUFC_MAG; + + /* Some vendors want WoL disabled by default, but still supported */ + if ((hw->mac.type == e1000_i350) && + (pdev->subsystem_vendor == PCI_VENDOR_ID_HP)) { + adapter->flags |= IGB_FLAG_WOL_SUPPORTED; + adapter->wol = 0; + } + +#ifdef NO_KNI + device_set_wakeup_enable(pci_dev_to_dev(adapter->pdev), + adapter->flags & IGB_FLAG_WOL_SUPPORTED); + + /* reset the hardware with the new settings */ + igb_reset(adapter); + adapter->devrc = 0; + +#ifdef HAVE_I2C_SUPPORT + /* Init the I2C interface */ + err = igb_init_i2c(adapter); + if (err) { + dev_err(&pdev->dev, "failed to init i2c interface\n"); + goto err_eeprom; + } +#endif /* HAVE_I2C_SUPPORT */ + + /* let the f/w know that the h/w is now under the control of the + * driver. */ + igb_get_hw_control(adapter); + + strncpy(netdev->name, "eth%d", IFNAMSIZ); + err = register_netdev(netdev); + if (err) + goto err_register; + +#ifdef CONFIG_IGB_VMDQ_NETDEV + err = igb_init_vmdq_netdevs(adapter); + if (err) + goto err_register; +#endif + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + +#ifdef IGB_DCA + if (dca_add_requester(&pdev->dev) == E1000_SUCCESS) { + adapter->flags |= IGB_FLAG_DCA_ENABLED; + dev_info(pci_dev_to_dev(pdev), "DCA enabled\n"); + igb_setup_dca(adapter); + } + +#endif +#ifdef HAVE_PTP_1588_CLOCK + /* do hw tstamp init after resetting */ + igb_ptp_init(adapter); +#endif /* HAVE_PTP_1588_CLOCK */ + +#endif /* NO_KNI */ + dev_info(pci_dev_to_dev(pdev), "Intel(R) Gigabit Ethernet Network Connection\n"); + /* print bus type/speed/width info */ + dev_info(pci_dev_to_dev(pdev), "%s: (PCIe:%s:%s) ", + netdev->name, + ((hw->bus.speed == e1000_bus_speed_2500) ? "2.5GT/s" : + (hw->bus.speed == e1000_bus_speed_5000) ? "5.0GT/s" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown"), + ((hw->bus.width == e1000_bus_width_pcie_x4) ? "Width x4" : + (hw->bus.width == e1000_bus_width_pcie_x2) ? "Width x2" : + (hw->bus.width == e1000_bus_width_pcie_x1) ? "Width x1" : + (hw->mac.type == e1000_i354) ? "integrated" : + "unknown")); + dev_info(pci_dev_to_dev(pdev), "%s: MAC: ", netdev->name); + for (i = 0; i < 6; i++) + printk("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + + ret_val = e1000_read_pba_string(hw, pba_str, E1000_PBANUM_LENGTH); + if (ret_val) + strncpy(pba_str, "Unknown", sizeof(pba_str) - 1); + dev_info(pci_dev_to_dev(pdev), "%s: PBA No: %s\n", netdev->name, + pba_str); + + + /* Initialize the thermal sensor on i350 devices. */ + if (hw->mac.type == e1000_i350) { + if (hw->bus.func == 0) { + u16 ets_word; + + /* + * Read the NVM to determine if this i350 device + * supports an external thermal sensor. + */ + e1000_read_nvm(hw, NVM_ETS_CFG, 1, &ets_word); + if (ets_word != 0x0000 && ets_word != 0xFFFF) + adapter->ets = true; + else + adapter->ets = false; + } +#ifdef NO_KNI +#ifdef IGB_HWMON + + igb_sysfs_init(adapter); +#else +#ifdef IGB_PROCFS + + igb_procfs_init(adapter); +#endif /* IGB_PROCFS */ +#endif /* IGB_HWMON */ +#endif /* NO_KNI */ + } else { + adapter->ets = false; + } + + if (hw->phy.media_type == e1000_media_type_copper) { + switch (hw->mac.type) { + case e1000_i350: + case e1000_i210: + case e1000_i211: + /* Enable EEE for internal copper PHY devices */ + err = e1000_set_eee_i350(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + break; + case e1000_i354: + if ((E1000_READ_REG(hw, E1000_CTRL_EXT)) & + (E1000_CTRL_EXT_LINK_MODE_SGMII)) { + err = e1000_set_eee_i354(hw); + if ((!err) && + (adapter->flags & IGB_FLAG_EEE)) + adapter->eee_advert = + MDIO_EEE_100TX | MDIO_EEE_1000T; + } + break; + default: + break; + } + } + + /* send driver version info to firmware */ + if (hw->mac.type >= e1000_i350) + igb_init_fw(adapter); + +#ifndef IGB_NO_LRO + if (netdev->features & NETIF_F_LRO) + dev_info(pci_dev_to_dev(pdev), "Internal LRO is enabled \n"); + else + dev_info(pci_dev_to_dev(pdev), "LRO is disabled \n"); +#endif + dev_info(pci_dev_to_dev(pdev), + "Using %s interrupts. %d rx queue(s), %d tx queue(s)\n", + adapter->msix_entries ? "MSI-X" : + (adapter->flags & IGB_FLAG_HAS_MSI) ? "MSI" : "legacy", + adapter->num_rx_queues, adapter->num_tx_queues); + + cards_found++; + *lad_dev = netdev; + + pm_runtime_put_noidle(&pdev->dev); + return 0; + +//err_register: +// igb_release_hw_control(adapter); +#ifdef HAVE_I2C_SUPPORT + memset(&adapter->i2c_adap, 0, sizeof(adapter->i2c_adap)); +#endif /* HAVE_I2C_SUPPORT */ +err_eeprom: +// if (!e1000_check_reset_block(hw)) +// e1000_phy_hw_reset(hw); + + if (hw->flash_address) + iounmap(hw->flash_address); +err_sw_init: +// igb_clear_interrupt_scheme(adapter); +// igb_reset_sriov_capability(adapter); + iounmap(hw->hw_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: +// pci_release_selected_regions(pdev, +// pci_select_bars(pdev, IORESOURCE_MEM)); +//err_pci_reg: +//err_dma: + pci_disable_device(pdev); + return err; +} + + +void igb_kni_remove(struct pci_dev *pdev) +{ + pci_disable_device(pdev); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c new file mode 100644 index 00000000..f79ce7c1 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_param.c @@ -0,0 +1,847 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#include + +#include "igb.h" + +/* This is the only thing that needs to be changed to adjust the + * maximum number of ports that the driver can manage. + */ + +#define IGB_MAX_NIC 32 + +#define OPTION_UNSET -1 +#define OPTION_DISABLED 0 +#define OPTION_ENABLED 1 +#define MAX_NUM_LIST_OPTS 15 + +/* All parameters are treated the same, as an integer array of values. + * This macro just reduces the need to repeat the same declaration code + * over and over (plus this helps to avoid typo bugs). + */ + +#define IGB_PARAM_INIT { [0 ... IGB_MAX_NIC] = OPTION_UNSET } +#ifndef module_param_array +/* Module Parameters are always initialized to -1, so that the driver + * can tell the difference between no user specified value or the + * user asking for the default value. + * The true default values are loaded in when igb_check_options is called. + * + * This is a GCC extension to ANSI C. + * See the item "Labeled Elements in Initializers" in the section + * "Extensions to the C Language Family" of the GCC documentation. + */ + +#define IGB_PARAM(X, desc) \ + static const int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \ + MODULE_PARM(X, "1-" __MODULE_STRING(IGB_MAX_NIC) "i"); \ + MODULE_PARM_DESC(X, desc); +#else +#define IGB_PARAM(X, desc) \ + static int X[IGB_MAX_NIC+1] = IGB_PARAM_INIT; \ + static unsigned int num_##X; \ + module_param_array_named(X, X, int, &num_##X, 0); \ + MODULE_PARM_DESC(X, desc); +#endif + +/* Interrupt Throttle Rate (interrupts/sec) + * + * Valid Range: 100-100000 (0=off, 1=dynamic, 3=dynamic conservative) + */ +IGB_PARAM(InterruptThrottleRate, + "Maximum interrupts per second, per vector, (max 100000), default 3=adaptive"); +#define DEFAULT_ITR 3 +#define MAX_ITR 100000 +/* #define MIN_ITR 120 */ +#define MIN_ITR 0 +/* IntMode (Interrupt Mode) + * + * Valid Range: 0 - 2 + * + * Default Value: 2 (MSI-X) + */ +IGB_PARAM(IntMode, "Change Interrupt Mode (0=Legacy, 1=MSI, 2=MSI-X), default 2"); +#define MAX_INTMODE IGB_INT_MODE_MSIX +#define MIN_INTMODE IGB_INT_MODE_LEGACY + +IGB_PARAM(Node, "set the starting node to allocate memory on, default -1"); + +/* LLIPort (Low Latency Interrupt TCP Port) + * + * Valid Range: 0 - 65535 + * + * Default Value: 0 (disabled) + */ +IGB_PARAM(LLIPort, "Low Latency Interrupt TCP Port (0-65535), default 0=off"); + +#define DEFAULT_LLIPORT 0 +#define MAX_LLIPORT 0xFFFF +#define MIN_LLIPORT 0 + +/* LLIPush (Low Latency Interrupt on TCP Push flag) + * + * Valid Range: 0, 1 + * + * Default Value: 0 (disabled) + */ +IGB_PARAM(LLIPush, "Low Latency Interrupt on TCP Push flag (0,1), default 0=off"); + +#define DEFAULT_LLIPUSH 0 +#define MAX_LLIPUSH 1 +#define MIN_LLIPUSH 0 + +/* LLISize (Low Latency Interrupt on Packet Size) + * + * Valid Range: 0 - 1500 + * + * Default Value: 0 (disabled) + */ +IGB_PARAM(LLISize, "Low Latency Interrupt on Packet Size (0-1500), default 0=off"); + +#define DEFAULT_LLISIZE 0 +#define MAX_LLISIZE 1500 +#define MIN_LLISIZE 0 + +/* RSS (Enable RSS multiqueue receive) + * + * Valid Range: 0 - 8 + * + * Default Value: 1 + */ +IGB_PARAM(RSS, "Number of Receive-Side Scaling Descriptor Queues (0-8), default 1, 0=number of cpus"); + +#define DEFAULT_RSS 1 +#define MAX_RSS 8 +#define MIN_RSS 0 + +/* VMDQ (Enable VMDq multiqueue receive) + * + * Valid Range: 0 - 8 + * + * Default Value: 0 + */ +IGB_PARAM(VMDQ, "Number of Virtual Machine Device Queues: 0-1 = disable, 2-8 enable, default 0"); + +#define DEFAULT_VMDQ 0 +#define MAX_VMDQ MAX_RSS +#define MIN_VMDQ 0 + +/* max_vfs (Enable SR-IOV VF devices) + * + * Valid Range: 0 - 7 + * + * Default Value: 0 + */ +IGB_PARAM(max_vfs, "Number of Virtual Functions: 0 = disable, 1-7 enable, default 0"); + +#define DEFAULT_SRIOV 0 +#define MAX_SRIOV 7 +#define MIN_SRIOV 0 + +/* MDD (Enable Malicious Driver Detection) + * + * Only available when SR-IOV is enabled - max_vfs is greater than 0 + * + * Valid Range: 0, 1 + * + * Default Value: 1 + */ +IGB_PARAM(MDD, "Malicious Driver Detection (0/1), default 1 = enabled. " + "Only available when max_vfs is greater than 0"); + +#ifdef DEBUG + +/* Disable Hardware Reset on Tx Hang + * + * Valid Range: 0, 1 + * + * Default Value: 0 (disabled, i.e. h/w will reset) + */ +IGB_PARAM(DisableHwReset, "Disable reset of hardware on Tx hang"); + +/* Dump Transmit and Receive buffers + * + * Valid Range: 0, 1 + * + * Default Value: 0 + */ +IGB_PARAM(DumpBuffers, "Dump Tx/Rx buffers on Tx hang or by request"); + +#endif /* DEBUG */ + +/* QueuePairs (Enable TX/RX queue pairs for interrupt handling) + * + * Valid Range: 0 - 1 + * + * Default Value: 1 + */ +IGB_PARAM(QueuePairs, "Enable Tx/Rx queue pairs for interrupt handling (0,1), default 1=on"); + +#define DEFAULT_QUEUE_PAIRS 1 +#define MAX_QUEUE_PAIRS 1 +#define MIN_QUEUE_PAIRS 0 + +/* Enable/disable EEE (a.k.a. IEEE802.3az) + * + * Valid Range: 0, 1 + * + * Default Value: 1 + */ + IGB_PARAM(EEE, "Enable/disable on parts that support the feature"); + +/* Enable/disable DMA Coalescing + * + * Valid Values: 0(off), 1000, 2000, 3000, 4000, 5000, 6000, 7000, 8000, + * 9000, 10000(msec), 250(usec), 500(usec) + * + * Default Value: 0 + */ + IGB_PARAM(DMAC, "Disable or set latency for DMA Coalescing ((0=off, 1000-10000(msec), 250, 500 (usec))"); + +#ifndef IGB_NO_LRO +/* Enable/disable Large Receive Offload + * + * Valid Values: 0(off), 1(on) + * + * Default Value: 0 + */ + IGB_PARAM(LRO, "Large Receive Offload (0,1), default 0=off"); + +#endif +struct igb_opt_list { + int i; + char *str; +}; +struct igb_option { + enum { enable_option, range_option, list_option } type; + const char *name; + const char *err; + int def; + union { + struct { /* range_option info */ + int min; + int max; + } r; + struct { /* list_option info */ + int nr; + struct igb_opt_list *p; + } l; + } arg; +}; + +static int igb_validate_option(unsigned int *value, + struct igb_option *opt, + struct igb_adapter *adapter) +{ + if (*value == OPTION_UNSET) { + *value = opt->def; + return 0; + } + + switch (opt->type) { + case enable_option: + switch (*value) { + case OPTION_ENABLED: + DPRINTK(PROBE, INFO, "%s Enabled\n", opt->name); + return 0; + case OPTION_DISABLED: + DPRINTK(PROBE, INFO, "%s Disabled\n", opt->name); + return 0; + } + break; + case range_option: + if (*value >= opt->arg.r.min && *value <= opt->arg.r.max) { + DPRINTK(PROBE, INFO, + "%s set to %d\n", opt->name, *value); + return 0; + } + break; + case list_option: { + int i; + struct igb_opt_list *ent; + + for (i = 0; i < opt->arg.l.nr; i++) { + ent = &opt->arg.l.p[i]; + if (*value == ent->i) { + if (ent->str[0] != '\0') + DPRINTK(PROBE, INFO, "%s\n", ent->str); + return 0; + } + } + } + break; + default: + BUG(); + } + + DPRINTK(PROBE, INFO, "Invalid %s value specified (%d) %s\n", + opt->name, *value, opt->err); + *value = opt->def; + return -1; +} + +/** + * igb_check_options - Range Checking for Command Line Parameters + * @adapter: board private structure + * + * This routine checks all command line parameters for valid user + * input. If an invalid value is given, or if no user specified + * value exists, a default value is used. The final value is stored + * in a variable in the adapter structure. + **/ + +void igb_check_options(struct igb_adapter *adapter) +{ + int bd = adapter->bd_number; + struct e1000_hw *hw = &adapter->hw; + + if (bd >= IGB_MAX_NIC) { + DPRINTK(PROBE, NOTICE, + "Warning: no configuration for board #%d\n", bd); + DPRINTK(PROBE, NOTICE, "Using defaults for all values\n"); +#ifndef module_param_array + bd = IGB_MAX_NIC; +#endif + } + + { /* Interrupt Throttling Rate */ + struct igb_option opt = { + .type = range_option, + .name = "Interrupt Throttling Rate (ints/sec)", + .err = "using default of " __MODULE_STRING(DEFAULT_ITR), + .def = DEFAULT_ITR, + .arg = { .r = { .min = MIN_ITR, + .max = MAX_ITR } } + }; + +#ifdef module_param_array + if (num_InterruptThrottleRate > bd) { +#endif + unsigned int itr = InterruptThrottleRate[bd]; + + switch (itr) { + case 0: + DPRINTK(PROBE, INFO, "%s turned off\n", + opt.name); + if (hw->mac.type >= e1000_i350) + adapter->dmac = IGB_DMAC_DISABLE; + adapter->rx_itr_setting = itr; + break; + case 1: + DPRINTK(PROBE, INFO, "%s set to dynamic mode\n", + opt.name); + adapter->rx_itr_setting = itr; + break; + case 3: + DPRINTK(PROBE, INFO, + "%s set to dynamic conservative mode\n", + opt.name); + adapter->rx_itr_setting = itr; + break; + default: + igb_validate_option(&itr, &opt, adapter); + /* Save the setting, because the dynamic bits + * change itr. In case of invalid user value, + * default to conservative mode, else need to + * clear the lower two bits because they are + * used as control */ + if (itr == 3) { + adapter->rx_itr_setting = itr; + } else { + adapter->rx_itr_setting = 1000000000 / + (itr * 256); + adapter->rx_itr_setting &= ~3; + } + break; + } +#ifdef module_param_array + } else { + adapter->rx_itr_setting = opt.def; + } +#endif + adapter->tx_itr_setting = adapter->rx_itr_setting; + } + { /* Interrupt Mode */ + struct igb_option opt = { + .type = range_option, + .name = "Interrupt Mode", + .err = "defaulting to 2 (MSI-X)", + .def = IGB_INT_MODE_MSIX, + .arg = { .r = { .min = MIN_INTMODE, + .max = MAX_INTMODE } } + }; + +#ifdef module_param_array + if (num_IntMode > bd) { +#endif + unsigned int int_mode = IntMode[bd]; + igb_validate_option(&int_mode, &opt, adapter); + adapter->int_mode = int_mode; +#ifdef module_param_array + } else { + adapter->int_mode = opt.def; + } +#endif + } + { /* Low Latency Interrupt TCP Port */ + struct igb_option opt = { + .type = range_option, + .name = "Low Latency Interrupt TCP Port", + .err = "using default of " __MODULE_STRING(DEFAULT_LLIPORT), + .def = DEFAULT_LLIPORT, + .arg = { .r = { .min = MIN_LLIPORT, + .max = MAX_LLIPORT } } + }; + +#ifdef module_param_array + if (num_LLIPort > bd) { +#endif + adapter->lli_port = LLIPort[bd]; + if (adapter->lli_port) { + igb_validate_option(&adapter->lli_port, &opt, + adapter); + } else { + DPRINTK(PROBE, INFO, "%s turned off\n", + opt.name); + } +#ifdef module_param_array + } else { + adapter->lli_port = opt.def; + } +#endif + } + { /* Low Latency Interrupt on Packet Size */ + struct igb_option opt = { + .type = range_option, + .name = "Low Latency Interrupt on Packet Size", + .err = "using default of " __MODULE_STRING(DEFAULT_LLISIZE), + .def = DEFAULT_LLISIZE, + .arg = { .r = { .min = MIN_LLISIZE, + .max = MAX_LLISIZE } } + }; + +#ifdef module_param_array + if (num_LLISize > bd) { +#endif + adapter->lli_size = LLISize[bd]; + if (adapter->lli_size) { + igb_validate_option(&adapter->lli_size, &opt, + adapter); + } else { + DPRINTK(PROBE, INFO, "%s turned off\n", + opt.name); + } +#ifdef module_param_array + } else { + adapter->lli_size = opt.def; + } +#endif + } + { /* Low Latency Interrupt on TCP Push flag */ + struct igb_option opt = { + .type = enable_option, + .name = "Low Latency Interrupt on TCP Push flag", + .err = "defaulting to Disabled", + .def = OPTION_DISABLED + }; + +#ifdef module_param_array + if (num_LLIPush > bd) { +#endif + unsigned int lli_push = LLIPush[bd]; + igb_validate_option(&lli_push, &opt, adapter); + adapter->flags |= lli_push ? IGB_FLAG_LLI_PUSH : 0; +#ifdef module_param_array + } else { + adapter->flags |= opt.def ? IGB_FLAG_LLI_PUSH : 0; + } +#endif + } + { /* SRIOV - Enable SR-IOV VF devices */ + struct igb_option opt = { + .type = range_option, + .name = "max_vfs - SR-IOV VF devices", + .err = "using default of " __MODULE_STRING(DEFAULT_SRIOV), + .def = DEFAULT_SRIOV, + .arg = { .r = { .min = MIN_SRIOV, + .max = MAX_SRIOV } } + }; + +#ifdef module_param_array + if (num_max_vfs > bd) { +#endif + adapter->vfs_allocated_count = max_vfs[bd]; + igb_validate_option(&adapter->vfs_allocated_count, &opt, adapter); + +#ifdef module_param_array + } else { + adapter->vfs_allocated_count = opt.def; + } +#endif + if (adapter->vfs_allocated_count) { + switch (hw->mac.type) { + case e1000_82575: + case e1000_82580: + case e1000_i210: + case e1000_i211: + case e1000_i354: + adapter->vfs_allocated_count = 0; + DPRINTK(PROBE, INFO, "SR-IOV option max_vfs not supported.\n"); + default: + break; + } + } + } + { /* VMDQ - Enable VMDq multiqueue receive */ + struct igb_option opt = { + .type = range_option, + .name = "VMDQ - VMDq multiqueue queue count", + .err = "using default of " __MODULE_STRING(DEFAULT_VMDQ), + .def = DEFAULT_VMDQ, + .arg = { .r = { .min = MIN_VMDQ, + .max = (MAX_VMDQ - adapter->vfs_allocated_count) } } + }; + if ((hw->mac.type != e1000_i210) || + (hw->mac.type != e1000_i211)) { +#ifdef module_param_array + if (num_VMDQ > bd) { +#endif + adapter->vmdq_pools = (VMDQ[bd] == 1 ? 0 : VMDQ[bd]); + if (adapter->vfs_allocated_count && !adapter->vmdq_pools) { + DPRINTK(PROBE, INFO, "Enabling SR-IOV requires VMDq be set to at least 1\n"); + adapter->vmdq_pools = 1; + } + igb_validate_option(&adapter->vmdq_pools, &opt, adapter); + +#ifdef module_param_array + } else { + if (!adapter->vfs_allocated_count) + adapter->vmdq_pools = (opt.def == 1 ? 0 : opt.def); + else + adapter->vmdq_pools = 1; + } +#endif +#ifdef CONFIG_IGB_VMDQ_NETDEV + if (hw->mac.type == e1000_82575 && adapter->vmdq_pools) { + DPRINTK(PROBE, INFO, "VMDq not supported on this part.\n"); + adapter->vmdq_pools = 0; + } +#endif + + } else { + DPRINTK(PROBE, INFO, "VMDq option is not supported.\n"); + adapter->vmdq_pools = opt.def; + } + } + { /* RSS - Enable RSS multiqueue receives */ + struct igb_option opt = { + .type = range_option, + .name = "RSS - RSS multiqueue receive count", + .err = "using default of " __MODULE_STRING(DEFAULT_RSS), + .def = DEFAULT_RSS, + .arg = { .r = { .min = MIN_RSS, + .max = MAX_RSS } } + }; + + switch (hw->mac.type) { + case e1000_82575: +#ifndef CONFIG_IGB_VMDQ_NETDEV + if (!!adapter->vmdq_pools) { + if (adapter->vmdq_pools <= 2) { + if (adapter->vmdq_pools == 2) + opt.arg.r.max = 3; + } else { + opt.arg.r.max = 1; + } + } else { + opt.arg.r.max = 4; + } +#else + opt.arg.r.max = !!adapter->vmdq_pools ? 1 : 4; +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + break; + case e1000_i210: + opt.arg.r.max = 4; + break; + case e1000_i211: + opt.arg.r.max = 2; + break; + case e1000_82576: +#ifndef CONFIG_IGB_VMDQ_NETDEV + if (!!adapter->vmdq_pools) + opt.arg.r.max = 2; + break; +#endif /* CONFIG_IGB_VMDQ_NETDEV */ + case e1000_82580: + case e1000_i350: + case e1000_i354: + default: + if (!!adapter->vmdq_pools) + opt.arg.r.max = 1; + break; + } + + if (adapter->int_mode != IGB_INT_MODE_MSIX) { + DPRINTK(PROBE, INFO, "RSS is not supported when in MSI/Legacy Interrupt mode, %s\n", + opt.err); + opt.arg.r.max = 1; + } + +#ifdef module_param_array + if (num_RSS > bd) { +#endif + adapter->rss_queues = RSS[bd]; + switch (adapter->rss_queues) { + case 1: + break; + default: + igb_validate_option(&adapter->rss_queues, &opt, adapter); + if (adapter->rss_queues) + break; + case 0: + adapter->rss_queues = min_t(u32, opt.arg.r.max, num_online_cpus()); + break; + } +#ifdef module_param_array + } else { + adapter->rss_queues = opt.def; + } +#endif + } + { /* QueuePairs - Enable Tx/Rx queue pairs for interrupt handling */ + struct igb_option opt = { + .type = enable_option, + .name = "QueuePairs - Tx/Rx queue pairs for interrupt handling", + .err = "defaulting to Enabled", + .def = OPTION_ENABLED + }; +#ifdef module_param_array + if (num_QueuePairs > bd) { +#endif + unsigned int qp = QueuePairs[bd]; + /* + * We must enable queue pairs if the number of queues + * exceeds the number of available interrupts. We are + * limited to 10, or 3 per unallocated vf. On I210 and + * I211 devices, we are limited to 5 interrupts. + * However, since I211 only supports 2 queues, we do not + * need to check and override the user option. + */ + if (qp == OPTION_DISABLED) { + if (adapter->rss_queues > 4) + qp = OPTION_ENABLED; + + if (adapter->vmdq_pools > 4) + qp = OPTION_ENABLED; + + if (adapter->rss_queues > 1 && + (adapter->vmdq_pools > 3 || + adapter->vfs_allocated_count > 6)) + qp = OPTION_ENABLED; + + if (hw->mac.type == e1000_i210 && + adapter->rss_queues > 2) + qp = OPTION_ENABLED; + + if (qp == OPTION_ENABLED) + DPRINTK(PROBE, INFO, "Number of queues exceeds available interrupts, %s\n", + opt.err); + } + igb_validate_option(&qp, &opt, adapter); + adapter->flags |= qp ? IGB_FLAG_QUEUE_PAIRS : 0; +#ifdef module_param_array + } else { + adapter->flags |= opt.def ? IGB_FLAG_QUEUE_PAIRS : 0; + } +#endif + } + { /* EEE - Enable EEE for capable adapters */ + + if (hw->mac.type >= e1000_i350) { + struct igb_option opt = { + .type = enable_option, + .name = "EEE Support", + .err = "defaulting to Enabled", + .def = OPTION_ENABLED + }; +#ifdef module_param_array + if (num_EEE > bd) { +#endif + unsigned int eee = EEE[bd]; + igb_validate_option(&eee, &opt, adapter); + adapter->flags |= eee ? IGB_FLAG_EEE : 0; + if (eee) + hw->dev_spec._82575.eee_disable = false; + else + hw->dev_spec._82575.eee_disable = true; + +#ifdef module_param_array + } else { + adapter->flags |= opt.def ? IGB_FLAG_EEE : 0; + if (adapter->flags & IGB_FLAG_EEE) + hw->dev_spec._82575.eee_disable = false; + else + hw->dev_spec._82575.eee_disable = true; + } +#endif + } + } + { /* DMAC - Enable DMA Coalescing for capable adapters */ + + if (hw->mac.type >= e1000_i350) { + struct igb_opt_list list [] = { + { IGB_DMAC_DISABLE, "DMAC Disable"}, + { IGB_DMAC_MIN, "DMAC 250 usec"}, + { IGB_DMAC_500, "DMAC 500 usec"}, + { IGB_DMAC_EN_DEFAULT, "DMAC 1000 usec"}, + { IGB_DMAC_2000, "DMAC 2000 usec"}, + { IGB_DMAC_3000, "DMAC 3000 usec"}, + { IGB_DMAC_4000, "DMAC 4000 usec"}, + { IGB_DMAC_5000, "DMAC 5000 usec"}, + { IGB_DMAC_6000, "DMAC 6000 usec"}, + { IGB_DMAC_7000, "DMAC 7000 usec"}, + { IGB_DMAC_8000, "DMAC 8000 usec"}, + { IGB_DMAC_9000, "DMAC 9000 usec"}, + { IGB_DMAC_MAX, "DMAC 10000 usec"} + }; + struct igb_option opt = { + .type = list_option, + .name = "DMA Coalescing", + .err = "using default of "__MODULE_STRING(IGB_DMAC_DISABLE), + .def = IGB_DMAC_DISABLE, + .arg = { .l = { .nr = 13, + .p = list + } + } + }; +#ifdef module_param_array + if (num_DMAC > bd) { +#endif + unsigned int dmac = DMAC[bd]; + if (adapter->rx_itr_setting == IGB_DMAC_DISABLE) + dmac = IGB_DMAC_DISABLE; + igb_validate_option(&dmac, &opt, adapter); + switch (dmac) { + case IGB_DMAC_DISABLE: + adapter->dmac = dmac; + break; + case IGB_DMAC_MIN: + adapter->dmac = dmac; + break; + case IGB_DMAC_500: + adapter->dmac = dmac; + break; + case IGB_DMAC_EN_DEFAULT: + adapter->dmac = dmac; + break; + case IGB_DMAC_2000: + adapter->dmac = dmac; + break; + case IGB_DMAC_3000: + adapter->dmac = dmac; + break; + case IGB_DMAC_4000: + adapter->dmac = dmac; + break; + case IGB_DMAC_5000: + adapter->dmac = dmac; + break; + case IGB_DMAC_6000: + adapter->dmac = dmac; + break; + case IGB_DMAC_7000: + adapter->dmac = dmac; + break; + case IGB_DMAC_8000: + adapter->dmac = dmac; + break; + case IGB_DMAC_9000: + adapter->dmac = dmac; + break; + case IGB_DMAC_MAX: + adapter->dmac = dmac; + break; + default: + adapter->dmac = opt.def; + DPRINTK(PROBE, INFO, + "Invalid DMAC setting, " + "resetting DMAC to %d\n", opt.def); + } +#ifdef module_param_array + } else + adapter->dmac = opt.def; +#endif + } + } +#ifndef IGB_NO_LRO + { /* LRO - Enable Large Receive Offload */ + struct igb_option opt = { + .type = enable_option, + .name = "LRO - Large Receive Offload", + .err = "defaulting to Disabled", + .def = OPTION_DISABLED + }; + struct net_device *netdev = adapter->netdev; +#ifdef module_param_array + if (num_LRO > bd) { +#endif + unsigned int lro = LRO[bd]; + igb_validate_option(&lro, &opt, adapter); + netdev->features |= lro ? NETIF_F_LRO : 0; +#ifdef module_param_array + } else if (opt.def == OPTION_ENABLED) { + netdev->features |= NETIF_F_LRO; + } +#endif + } +#endif /* IGB_NO_LRO */ + { /* MDD - Enable Malicious Driver Detection. Only available when + SR-IOV is enabled. */ + struct igb_option opt = { + .type = enable_option, + .name = "Malicious Driver Detection", + .err = "defaulting to 1", + .def = OPTION_ENABLED, + .arg = { .r = { .min = OPTION_DISABLED, + .max = OPTION_ENABLED } } + }; + +#ifdef module_param_array + if (num_MDD > bd) { +#endif + adapter->mdd = MDD[bd]; + igb_validate_option((uint *)&adapter->mdd, &opt, + adapter); +#ifdef module_param_array + } else { + adapter->mdd = opt.def; + } +#endif + } +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c new file mode 100644 index 00000000..66236d29 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_procfs.c @@ -0,0 +1,363 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" +#include "e1000_82575.h" +#include "e1000_hw.h" + +#ifdef IGB_PROCFS +#ifndef IGB_HWMON + +#include +#include +#include +#include +#include + +static struct proc_dir_entry *igb_top_dir = NULL; + + +bool igb_thermal_present(struct igb_adapter *adapter) +{ + s32 status; + struct e1000_hw *hw; + + if (adapter == NULL) + return false; + hw = &adapter->hw; + + /* + * Only set I2C bit-bang mode if an external thermal sensor is + * supported on this device. + */ + if (adapter->ets) { + status = e1000_set_i2c_bb(hw); + if (status != E1000_SUCCESS) + return false; + } + + status = hw->mac.ops.init_thermal_sensor_thresh(hw); + if (status != E1000_SUCCESS) + return false; + + return true; +} + + +static int igb_macburn(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct e1000_hw *hw; + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + hw = &adapter->hw; + if (hw == NULL) + return snprintf(page, count, "error: no hw data\n"); + + return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", + (unsigned int)hw->mac.perm_addr[0], + (unsigned int)hw->mac.perm_addr[1], + (unsigned int)hw->mac.perm_addr[2], + (unsigned int)hw->mac.perm_addr[3], + (unsigned int)hw->mac.perm_addr[4], + (unsigned int)hw->mac.perm_addr[5]); +} + +static int igb_macadmn(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct e1000_hw *hw; + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + hw = &adapter->hw; + if (hw == NULL) + return snprintf(page, count, "error: no hw data\n"); + + return snprintf(page, count, "0x%02X%02X%02X%02X%02X%02X\n", + (unsigned int)hw->mac.addr[0], + (unsigned int)hw->mac.addr[1], + (unsigned int)hw->mac.addr[2], + (unsigned int)hw->mac.addr[3], + (unsigned int)hw->mac.addr[4], + (unsigned int)hw->mac.addr[5]); +} + +static int igb_numeports(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct e1000_hw *hw; + int ports; + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + hw = &adapter->hw; + if (hw == NULL) + return snprintf(page, count, "error: no hw data\n"); + + ports = 4; + + return snprintf(page, count, "%d\n", ports); +} + +static int igb_porttype(char *page, char **start, off_t off, int count, + int *eof, void *data) +{ + struct igb_adapter *adapter = (struct igb_adapter *)data; + if (adapter == NULL) + return snprintf(page, count, "error: no adapter\n"); + + return snprintf(page, count, "%d\n", + test_bit(__IGB_DOWN, &adapter->state)); +} + +static int igb_therm_location(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + return snprintf(page, count, "%d\n", therm_data->sensor_data->location); +} + +static int igb_therm_maxopthresh(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + return snprintf(page, count, "%d\n", + therm_data->sensor_data->max_op_thresh); +} + +static int igb_therm_cautionthresh(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + return snprintf(page, count, "%d\n", + therm_data->sensor_data->caution_thresh); +} + +static int igb_therm_temp(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + s32 status; + struct igb_therm_proc_data *therm_data = + (struct igb_therm_proc_data *)data; + + if (therm_data == NULL) + return snprintf(page, count, "error: no therm_data\n"); + + status = e1000_get_thermal_sensor_data(therm_data->hw); + if (status != E1000_SUCCESS) + snprintf(page, count, "error: status %d returned\n", status); + + return snprintf(page, count, "%d\n", therm_data->sensor_data->temp); +} + +struct igb_proc_type{ + char name[32]; + int (*read)(char*, char**, off_t, int, int*, void*); +}; + +struct igb_proc_type igb_proc_entries[] = { + {"numeports", &igb_numeports}, + {"porttype", &igb_porttype}, + {"macburn", &igb_macburn}, + {"macadmn", &igb_macadmn}, + {"", NULL} +}; + +struct igb_proc_type igb_internal_entries[] = { + {"location", &igb_therm_location}, + {"temp", &igb_therm_temp}, + {"cautionthresh", &igb_therm_cautionthresh}, + {"maxopthresh", &igb_therm_maxopthresh}, + {"", NULL} +}; + +void igb_del_proc_entries(struct igb_adapter *adapter) +{ + int index, i; + char buf[16]; /* much larger than the sensor number will ever be */ + + if (igb_top_dir == NULL) + return; + + for (i = 0; i < E1000_MAX_SENSORS; i++) { + if (adapter->therm_dir[i] == NULL) + continue; + + for (index = 0; ; index++) { + if (igb_internal_entries[index].read == NULL) + break; + + remove_proc_entry(igb_internal_entries[index].name, + adapter->therm_dir[i]); + } + snprintf(buf, sizeof(buf), "sensor_%d", i); + remove_proc_entry(buf, adapter->info_dir); + } + + if (adapter->info_dir != NULL) { + for (index = 0; ; index++) { + if (igb_proc_entries[index].read == NULL) + break; + remove_proc_entry(igb_proc_entries[index].name, + adapter->info_dir); + } + remove_proc_entry("info", adapter->eth_dir); + } + + if (adapter->eth_dir != NULL) + remove_proc_entry(pci_name(adapter->pdev), igb_top_dir); +} + +/* called from igb_main.c */ +void igb_procfs_exit(struct igb_adapter *adapter) +{ + igb_del_proc_entries(adapter); +} + +int igb_procfs_topdir_init(void) +{ + igb_top_dir = proc_mkdir("driver/igb", NULL); + if (igb_top_dir == NULL) + return -ENOMEM; + + return 0; +} + +void igb_procfs_topdir_exit(void) +{ + remove_proc_entry("driver/igb", NULL); +} + +/* called from igb_main.c */ +int igb_procfs_init(struct igb_adapter *adapter) +{ + int rc = 0; + int i; + int index; + char buf[16]; /* much larger than the sensor number will ever be */ + + adapter->eth_dir = NULL; + adapter->info_dir = NULL; + for (i = 0; i < E1000_MAX_SENSORS; i++) + adapter->therm_dir[i] = NULL; + + if ( igb_top_dir == NULL ) { + rc = -ENOMEM; + goto fail; + } + + adapter->eth_dir = proc_mkdir(pci_name(adapter->pdev), igb_top_dir); + if (adapter->eth_dir == NULL) { + rc = -ENOMEM; + goto fail; + } + + adapter->info_dir = proc_mkdir("info", adapter->eth_dir); + if (adapter->info_dir == NULL) { + rc = -ENOMEM; + goto fail; + } + for (index = 0; ; index++) { + if (igb_proc_entries[index].read == NULL) { + break; + } + if (!(create_proc_read_entry(igb_proc_entries[index].name, + 0444, + adapter->info_dir, + igb_proc_entries[index].read, + adapter))) { + + rc = -ENOMEM; + goto fail; + } + } + if (igb_thermal_present(adapter) == false) + goto exit; + + for (i = 0; i < E1000_MAX_SENSORS; i++) { + + if (adapter->hw.mac.thermal_sensor_data.sensor[i].location== 0) + continue; + + snprintf(buf, sizeof(buf), "sensor_%d", i); + adapter->therm_dir[i] = proc_mkdir(buf, adapter->info_dir); + if (adapter->therm_dir[i] == NULL) { + rc = -ENOMEM; + goto fail; + } + for (index = 0; ; index++) { + if (igb_internal_entries[index].read == NULL) + break; + /* + * therm_data struct contains pointer the read func + * will be needing + */ + adapter->therm_data[i].hw = &adapter->hw; + adapter->therm_data[i].sensor_data = + &adapter->hw.mac.thermal_sensor_data.sensor[i]; + + if (!(create_proc_read_entry( + igb_internal_entries[index].name, + 0444, + adapter->therm_dir[i], + igb_internal_entries[index].read, + &adapter->therm_data[i]))) { + rc = -ENOMEM; + goto fail; + } + } + } + goto exit; + +fail: + igb_del_proc_entries(adapter); +exit: + return rc; +} + +#endif /* !IGB_HWMON */ +#endif /* IGB_PROCFS */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c new file mode 100644 index 00000000..454b70ce --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_ptp.c @@ -0,0 +1,944 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/****************************************************************************** + Copyright(c) 2011 Richard Cochran for some of the + 82576 and 82580 code +******************************************************************************/ + +#include "igb.h" + +#include +#include +#include +#include + +#define INCVALUE_MASK 0x7fffffff +#define ISGN 0x80000000 + +/* + * The 82580 timesync updates the system timer every 8ns by 8ns, + * and this update value cannot be reprogrammed. + * + * Neither the 82576 nor the 82580 offer registers wide enough to hold + * nanoseconds time values for very long. For the 82580, SYSTIM always + * counts nanoseconds, but the upper 24 bits are not available. The + * frequency is adjusted by changing the 32 bit fractional nanoseconds + * register, TIMINCA. + * + * For the 82576, the SYSTIM register time unit is affect by the + * choice of the 24 bit TININCA:IV (incvalue) field. Five bits of this + * field are needed to provide the nominal 16 nanosecond period, + * leaving 19 bits for fractional nanoseconds. + * + * We scale the NIC clock cycle by a large factor so that relatively + * small clock corrections can be added or subtracted at each clock + * tick. The drawbacks of a large factor are a) that the clock + * register overflows more quickly (not such a big deal) and b) that + * the increment per tick has to fit into 24 bits. As a result we + * need to use a shift of 19 so we can fit a value of 16 into the + * TIMINCA register. + * + * + * SYSTIMH SYSTIML + * +--------------+ +---+---+------+ + * 82576 | 32 | | 8 | 5 | 19 | + * +--------------+ +---+---+------+ + * \________ 45 bits _______/ fract + * + * +----------+---+ +--------------+ + * 82580 | 24 | 8 | | 32 | + * +----------+---+ +--------------+ + * reserved \______ 40 bits _____/ + * + * + * The 45 bit 82576 SYSTIM overflows every + * 2^45 * 10^-9 / 3600 = 9.77 hours. + * + * The 40 bit 82580 SYSTIM overflows every + * 2^40 * 10^-9 / 60 = 18.3 minutes. + */ + +#define IGB_SYSTIM_OVERFLOW_PERIOD (HZ * 60 * 9) +#define IGB_PTP_TX_TIMEOUT (HZ * 15) +#define INCPERIOD_82576 (1 << E1000_TIMINCA_16NS_SHIFT) +#define INCVALUE_82576_MASK ((1 << E1000_TIMINCA_16NS_SHIFT) - 1) +#define INCVALUE_82576 (16 << IGB_82576_TSYNC_SHIFT) +#define IGB_NBITS_82580 40 + +/* + * SYSTIM read access for the 82576 + */ + +static cycle_t igb_ptp_read_82576(const struct cyclecounter *cc) +{ + struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); + struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; + + lo = E1000_READ_REG(hw, E1000_SYSTIML); + hi = E1000_READ_REG(hw, E1000_SYSTIMH); + + val = ((u64) hi) << 32; + val |= lo; + + return val; +} + +/* + * SYSTIM read access for the 82580 + */ + +static cycle_t igb_ptp_read_82580(const struct cyclecounter *cc) +{ + struct igb_adapter *igb = container_of(cc, struct igb_adapter, cc); + struct e1000_hw *hw = &igb->hw; + u64 val; + u32 lo, hi; + + /* The timestamp latches on lowest register read. For the 82580 + * the lowest register is SYSTIMR instead of SYSTIML. However we only + * need to provide nanosecond resolution, so we just ignore it. + */ + E1000_READ_REG(hw, E1000_SYSTIMR); + lo = E1000_READ_REG(hw, E1000_SYSTIML); + hi = E1000_READ_REG(hw, E1000_SYSTIMH); + + val = ((u64) hi) << 32; + val |= lo; + + return val; +} + +/* + * SYSTIM read access for I210/I211 + */ + +static void igb_ptp_read_i210(struct igb_adapter *adapter, struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + u32 sec, nsec; + + /* The timestamp latches on lowest register read. For I210/I211, the + * lowest register is SYSTIMR. Since we only need to provide nanosecond + * resolution, we can ignore it. + */ + E1000_READ_REG(hw, E1000_SYSTIMR); + nsec = E1000_READ_REG(hw, E1000_SYSTIML); + sec = E1000_READ_REG(hw, E1000_SYSTIMH); + + ts->tv_sec = sec; + ts->tv_nsec = nsec; +} + +static void igb_ptp_write_i210(struct igb_adapter *adapter, + const struct timespec *ts) +{ + struct e1000_hw *hw = &adapter->hw; + + /* + * Writing the SYSTIMR register is not necessary as it only provides + * sub-nanosecond resolution. + */ + E1000_WRITE_REG(hw, E1000_SYSTIML, ts->tv_nsec); + E1000_WRITE_REG(hw, E1000_SYSTIMH, ts->tv_sec); +} + +/** + * igb_ptp_systim_to_hwtstamp - convert system time value to hw timestamp + * @adapter: board private structure + * @hwtstamps: timestamp structure to update + * @systim: unsigned 64bit system time value. + * + * We need to convert the system time value stored in the RX/TXSTMP registers + * into a hwtstamp which can be used by the upper level timestamping functions. + * + * The 'tmreg_lock' spinlock is used to protect the consistency of the + * system time value. This is needed because reading the 64 bit time + * value involves reading two (or three) 32 bit registers. The first + * read latches the value. Ditto for writing. + * + * In addition, here have extended the system time with an overflow + * counter in software. + **/ +static void igb_ptp_systim_to_hwtstamp(struct igb_adapter *adapter, + struct skb_shared_hwtstamps *hwtstamps, + u64 systim) +{ + unsigned long flags; + u64 ns; + + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + spin_lock_irqsave(&adapter->tmreg_lock, flags); + + ns = timecounter_cyc2time(&adapter->tc, systim); + + spin_unlock_irqrestore(&adapter->tmreg_lock, flags); + + memset(hwtstamps, 0, sizeof(*hwtstamps)); + hwtstamps->hwtstamp = ns_to_ktime(ns); + break; + case e1000_i210: + case e1000_i211: + memset(hwtstamps, 0, sizeof(*hwtstamps)); + /* Upper 32 bits contain s, lower 32 bits contain ns. */ + hwtstamps->hwtstamp = ktime_set(systim >> 32, + systim & 0xFFFFFFFF); + break; + default: + break; + } +} + +/* + * PTP clock operations + */ + +static int igb_ptp_adjfreq_82576(struct ptp_clock_info *ptp, s32 ppb) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; + u64 rate; + u32 incvalue; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate = ppb; + rate <<= 14; + rate = div_u64(rate, 1953125); + + incvalue = 16 << IGB_82576_TSYNC_SHIFT; + + if (neg_adj) + incvalue -= rate; + else + incvalue += rate; + + E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | (incvalue & INCVALUE_82576_MASK)); + + return 0; +} + +static int igb_ptp_adjfreq_82580(struct ptp_clock_info *ptp, s32 ppb) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + struct e1000_hw *hw = &igb->hw; + int neg_adj = 0; + u64 rate; + u32 inca; + + if (ppb < 0) { + neg_adj = 1; + ppb = -ppb; + } + rate = ppb; + rate <<= 26; + rate = div_u64(rate, 1953125); + + /* At 2.5G speeds, the TIMINCA register on I354 updates the clock 2.5x + * as quickly. Account for this by dividing the adjustment by 2.5. + */ + if (hw->mac.type == e1000_i354) { + u32 status = E1000_READ_REG(hw, E1000_STATUS); + + if ((status & E1000_STATUS_2P5_SKU) && + !(status & E1000_STATUS_2P5_SKU_OVER)) { + rate <<= 1; + rate = div_u64(rate, 5); + } + } + + inca = rate & INCVALUE_MASK; + if (neg_adj) + inca |= ISGN; + + E1000_WRITE_REG(hw, E1000_TIMINCA, inca); + + return 0; +} + +static int igb_ptp_adjtime_82576(struct ptp_clock_info *ptp, s64 delta) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + s64 now; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + now = timecounter_read(&igb->tc); + now += delta; + timecounter_init(&igb->tc, &igb->cc, now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_adjtime_i210(struct ptp_clock_info *ptp, s64 delta) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + struct timespec now, then = ns_to_timespec(delta); + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, &now); + now = timespec_add(now, then); + igb_ptp_write_i210(igb, (const struct timespec *)&now); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_gettime_82576(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; + u32 remainder; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + ns = timecounter_read(&igb->tc); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + ts->tv_sec = div_u64_rem(ns, 1000000000, &remainder); + ts->tv_nsec = remainder; + + return 0; +} + +static int igb_ptp_gettime_i210(struct ptp_clock_info *ptp, + struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_read_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_82576(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + u64 ns; + + ns = ts->tv_sec * 1000000000ULL; + ns += ts->tv_nsec; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + timecounter_init(&igb->tc, &igb->cc, ns); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_settime_i210(struct ptp_clock_info *ptp, + const struct timespec *ts) +{ + struct igb_adapter *igb = container_of(ptp, struct igb_adapter, + ptp_caps); + unsigned long flags; + + spin_lock_irqsave(&igb->tmreg_lock, flags); + + igb_ptp_write_i210(igb, ts); + + spin_unlock_irqrestore(&igb->tmreg_lock, flags); + + return 0; +} + +static int igb_ptp_enable(struct ptp_clock_info *ptp, + struct ptp_clock_request *rq, int on) +{ + return -EOPNOTSUPP; +} + +/** + * igb_ptp_tx_work + * @work: pointer to work struct + * + * This work function polls the TSYNCTXCTL valid bit to determine when a + * timestamp has been taken for the current stored skb. + */ +void igb_ptp_tx_work(struct work_struct *work) +{ + struct igb_adapter *adapter = container_of(work, struct igb_adapter, + ptp_tx_work); + struct e1000_hw *hw = &adapter->hw; + u32 tsynctxctl; + + if (!adapter->ptp_tx_skb) + return; + + if (time_is_before_jiffies(adapter->ptp_tx_start + + IGB_PTP_TX_TIMEOUT)) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + adapter->tx_hwtstamp_timeouts++; + dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang"); + return; + } + + tsynctxctl = E1000_READ_REG(hw, E1000_TSYNCTXCTL); + if (tsynctxctl & E1000_TSYNCTXCTL_VALID) + igb_ptp_tx_hwtstamp(adapter); + else + /* reschedule to check later */ + schedule_work(&adapter->ptp_tx_work); +} + +static void igb_ptp_overflow_check(struct work_struct *work) +{ + struct igb_adapter *igb = + container_of(work, struct igb_adapter, ptp_overflow_work.work); + struct timespec ts; + + igb->ptp_caps.gettime(&igb->ptp_caps, &ts); + + pr_debug("igb overflow check at %ld.%09lu\n", ts.tv_sec, ts.tv_nsec); + + schedule_delayed_work(&igb->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); +} + +/** + * igb_ptp_rx_hang - detect error case when Rx timestamp registers latched + * @adapter: private network adapter structure + * + * This watchdog task is scheduled to detect error case where hardware has + * dropped an Rx packet that was timestamped when the ring is full. The + * particular error is rare but leaves the device in a state unable to timestamp + * any future packets. + */ +void igb_ptp_rx_hang(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct igb_ring *rx_ring; + u32 tsyncrxctl = E1000_READ_REG(hw, E1000_TSYNCRXCTL); + unsigned long rx_event; + int n; + + if (hw->mac.type != e1000_82576) + return; + + /* If we don't have a valid timestamp in the registers, just update the + * timeout counter and exit + */ + if (!(tsyncrxctl & E1000_TSYNCRXCTL_VALID)) { + adapter->last_rx_ptp_check = jiffies; + return; + } + + /* Determine the most recent watchdog or rx_timestamp event */ + rx_event = adapter->last_rx_ptp_check; + for (n = 0; n < adapter->num_rx_queues; n++) { + rx_ring = adapter->rx_ring[n]; + if (time_after(rx_ring->last_rx_timestamp, rx_event)) + rx_event = rx_ring->last_rx_timestamp; + } + + /* Only need to read the high RXSTMP register to clear the lock */ + if (time_is_before_jiffies(rx_event + 5 * HZ)) { + E1000_READ_REG(hw, E1000_RXSTMPH); + adapter->last_rx_ptp_check = jiffies; + adapter->rx_hwtstamp_cleared++; + dev_warn(&adapter->pdev->dev, "clearing Rx timestamp hang"); + } +} + +/** + * igb_ptp_tx_hwtstamp - utility function which checks for TX time stamp + * @adapter: Board private structure. + * + * If we were asked to do hardware stamping and such a time stamp is + * available, then it must have been for this skb here because we only + * allow only one such packet into the queue. + */ +void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct skb_shared_hwtstamps shhwtstamps; + u64 regval; + + regval = E1000_READ_REG(hw, E1000_TXSTMPL); + regval |= (u64)E1000_READ_REG(hw, E1000_TXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, &shhwtstamps, regval); + skb_tstamp_tx(adapter->ptp_tx_skb, &shhwtstamps); + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; +} + +/** + * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp + * @q_vector: Pointer to interrupt specific structure + * @va: Pointer to address containing Rx buffer + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the first buffer of an + * incoming frame. The value is stored in little endian format starting on + * byte 8. + */ +void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, + unsigned char *va, + struct sk_buff *skb) +{ + __le64 *regval = (__le64 *)va; + + /* + * The timestamp is recorded in little endian format. + * DWORD: 0 1 2 3 + * Field: Reserved Reserved SYSTIML SYSTIMH + */ + igb_ptp_systim_to_hwtstamp(q_vector->adapter, skb_hwtstamps(skb), + le64_to_cpu(regval[1])); +} + +/** + * igb_ptp_rx_rgtstamp - retrieve Rx timestamp stored in register + * @q_vector: Pointer to interrupt specific structure + * @skb: Buffer containing timestamp and packet + * + * This function is meant to retrieve a timestamp from the internal registers + * of the adapter and store it in the skb. + */ +void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, + struct sk_buff *skb) +{ + struct igb_adapter *adapter = q_vector->adapter; + struct e1000_hw *hw = &adapter->hw; + u64 regval; + + /* + * If this bit is set, then the RX registers contain the time stamp. No + * other packet will be time stamped until we read these registers, so + * read the registers to make them available again. Because only one + * packet can be time stamped at a time, we know that the register + * values must belong to this one here and therefore we don't need to + * compare any of the additional attributes stored for it. + * + * If nothing went wrong, then it should have a shared tx_flags that we + * can turn into a skb_shared_hwtstamps. + */ + if (!(E1000_READ_REG(hw, E1000_TSYNCRXCTL) & E1000_TSYNCRXCTL_VALID)) + return; + + regval = E1000_READ_REG(hw, E1000_RXSTMPL); + regval |= (u64)E1000_READ_REG(hw, E1000_RXSTMPH) << 32; + + igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval); +} + +/** + * igb_ptp_hwtstamp_ioctl - control hardware time stamping + * @netdev: + * @ifreq: + * @cmd: + * + * Outgoing time stamping can be enabled and disabled. Play nice and + * disable it when requested, although it shouldn't case any overhead + * when no packet needs it. At most one packet in the queue may be + * marked for time stamping, otherwise it would be impossible to tell + * for sure to which packet the hardware time stamp belongs. + * + * Incoming time stamping has to be configured via the hardware + * filters. Not all combinations are supported, in particular event + * type has to be specified. Matching the kind of event packet is + * not supported, with the exception of "all V2 events regardless of + * level 2 or 4". + * + **/ +int igb_ptp_hwtstamp_ioctl(struct net_device *netdev, + struct ifreq *ifr, int cmd) +{ + struct igb_adapter *adapter = netdev_priv(netdev); + struct e1000_hw *hw = &adapter->hw; + struct hwtstamp_config config; + u32 tsync_tx_ctl = E1000_TSYNCTXCTL_ENABLED; + u32 tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + u32 tsync_rx_cfg = 0; + bool is_l4 = false; + bool is_l2 = false; + u32 regval; + + if (copy_from_user(&config, ifr->ifr_data, sizeof(config))) + return -EFAULT; + + /* reserved for future extensions */ + if (config.flags) + return -EINVAL; + + switch (config.tx_type) { + case HWTSTAMP_TX_OFF: + tsync_tx_ctl = 0; + case HWTSTAMP_TX_ON: + break; + default: + return -ERANGE; + } + + switch (config.rx_filter) { + case HWTSTAMP_FILTER_NONE: + tsync_rx_ctl = 0; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_SYNC: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_SYNC_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_L4_V1; + tsync_rx_cfg = E1000_TSYNCRXCFG_PTP_V1_DELAY_REQ_MESSAGE; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L2_EVENT: + case HWTSTAMP_FILTER_PTP_V2_L4_EVENT: + case HWTSTAMP_FILTER_PTP_V2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L2_SYNC: + case HWTSTAMP_FILTER_PTP_V2_L4_SYNC: + case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ: + case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ: + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_EVENT_V2; + config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT; + is_l2 = true; + is_l4 = true; + break; + case HWTSTAMP_FILTER_PTP_V1_L4_EVENT: + case HWTSTAMP_FILTER_ALL: + /* + * 82576 cannot timestamp all packets, which it needs to do to + * support both V1 Sync and Delay_Req messages + */ + if (hw->mac.type != e1000_82576) { + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + break; + } + /* fall through */ + default: + config.rx_filter = HWTSTAMP_FILTER_NONE; + return -ERANGE; + } + + if (hw->mac.type == e1000_82575) { + if (tsync_rx_ctl | tsync_tx_ctl) + return -EINVAL; + return 0; + } + + /* + * Per-packet timestamping only works if all packets are + * timestamped, so enable timestamping in all packets as + * long as one rx filter was configured. + */ + if ((hw->mac.type >= e1000_82580) && tsync_rx_ctl) { + tsync_rx_ctl = E1000_TSYNCRXCTL_ENABLED; + tsync_rx_ctl |= E1000_TSYNCRXCTL_TYPE_ALL; + config.rx_filter = HWTSTAMP_FILTER_ALL; + is_l2 = true; + is_l4 = true; + + if ((hw->mac.type == e1000_i210) || + (hw->mac.type == e1000_i211)) { + regval = E1000_READ_REG(hw, E1000_RXPBS); + regval |= E1000_RXPBS_CFG_TS_EN; + E1000_WRITE_REG(hw, E1000_RXPBS, regval); + } + } + + /* enable/disable TX */ + regval = E1000_READ_REG(hw, E1000_TSYNCTXCTL); + regval &= ~E1000_TSYNCTXCTL_ENABLED; + regval |= tsync_tx_ctl; + E1000_WRITE_REG(hw, E1000_TSYNCTXCTL, regval); + + /* enable/disable RX */ + regval = E1000_READ_REG(hw, E1000_TSYNCRXCTL); + regval &= ~(E1000_TSYNCRXCTL_ENABLED | E1000_TSYNCRXCTL_TYPE_MASK); + regval |= tsync_rx_ctl; + E1000_WRITE_REG(hw, E1000_TSYNCRXCTL, regval); + + /* define which PTP packets are time stamped */ + E1000_WRITE_REG(hw, E1000_TSYNCRXCFG, tsync_rx_cfg); + + /* define ethertype filter for timestamped packets */ + if (is_l2) + E1000_WRITE_REG(hw, E1000_ETQF(3), + (E1000_ETQF_FILTER_ENABLE | /* enable filter */ + E1000_ETQF_1588 | /* enable timestamping */ + ETH_P_1588)); /* 1588 eth protocol type */ + else + E1000_WRITE_REG(hw, E1000_ETQF(3), 0); + + /* L4 Queue Filter[3]: filter by destination port and protocol */ + if (is_l4) { + u32 ftqf = (IPPROTO_UDP /* UDP */ + | E1000_FTQF_VF_BP /* VF not compared */ + | E1000_FTQF_1588_TIME_STAMP /* Enable Timestamping */ + | E1000_FTQF_MASK); /* mask all inputs */ + ftqf &= ~E1000_FTQF_MASK_PROTO_BP; /* enable protocol check */ + + E1000_WRITE_REG(hw, E1000_IMIR(3), htons(PTP_EV_PORT)); + E1000_WRITE_REG(hw, E1000_IMIREXT(3), + (E1000_IMIREXT_SIZE_BP | E1000_IMIREXT_CTRL_BP)); + if (hw->mac.type == e1000_82576) { + /* enable source port check */ + E1000_WRITE_REG(hw, E1000_SPQF(3), htons(PTP_EV_PORT)); + ftqf &= ~E1000_FTQF_MASK_SOURCE_PORT_BP; + } + E1000_WRITE_REG(hw, E1000_FTQF(3), ftqf); + } else { + E1000_WRITE_REG(hw, E1000_FTQF(3), E1000_FTQF_MASK); + } + E1000_WRITE_FLUSH(hw); + + /* clear TX/RX time stamp registers, just to be sure */ + regval = E1000_READ_REG(hw, E1000_TXSTMPL); + regval = E1000_READ_REG(hw, E1000_TXSTMPH); + regval = E1000_READ_REG(hw, E1000_RXSTMPL); + regval = E1000_READ_REG(hw, E1000_RXSTMPH); + + return copy_to_user(ifr->ifr_data, &config, sizeof(config)) ? + -EFAULT : 0; +} + +void igb_ptp_init(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + + switch (hw->mac.type) { + case e1000_82576: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 999999881; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82576; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82576; + adapter->cc.mask = CLOCKSOURCE_MASK(64); + adapter->cc.mult = 1; + adapter->cc.shift = IGB_82576_TSYNC_SHIFT; + /* Dial the nominal frequency. */ + E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | + INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_82576; + adapter->ptp_caps.gettime = igb_ptp_gettime_82576; + adapter->ptp_caps.settime = igb_ptp_settime_82576; + adapter->ptp_caps.enable = igb_ptp_enable; + adapter->cc.read = igb_ptp_read_82580; + adapter->cc.mask = CLOCKSOURCE_MASK(IGB_NBITS_82580); + adapter->cc.mult = 1; + adapter->cc.shift = 0; + /* Enable the timer functions by clearing bit 31. */ + E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); + break; + case e1000_i210: + case e1000_i211: + snprintf(adapter->ptp_caps.name, 16, "%pm", netdev->dev_addr); + adapter->ptp_caps.owner = THIS_MODULE; + adapter->ptp_caps.max_adj = 62499999; + adapter->ptp_caps.n_ext_ts = 0; + adapter->ptp_caps.pps = 0; + adapter->ptp_caps.adjfreq = igb_ptp_adjfreq_82580; + adapter->ptp_caps.adjtime = igb_ptp_adjtime_i210; + adapter->ptp_caps.gettime = igb_ptp_gettime_i210; + adapter->ptp_caps.settime = igb_ptp_settime_i210; + adapter->ptp_caps.enable = igb_ptp_enable; + /* Enable the timer functions by clearing bit 31. */ + E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); + break; + default: + adapter->ptp_clock = NULL; + return; + } + + E1000_WRITE_FLUSH(hw); + + spin_lock_init(&adapter->tmreg_lock); + INIT_WORK(&adapter->ptp_tx_work, igb_ptp_tx_work); + + /* Initialize the clock and overflow work for devices that need it. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); + + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + + INIT_DELAYED_WORK(&adapter->ptp_overflow_work, + igb_ptp_overflow_check); + + schedule_delayed_work(&adapter->ptp_overflow_work, + IGB_SYSTIM_OVERFLOW_PERIOD); + } + + /* Initialize the time sync interrupts for devices that support it. */ + if (hw->mac.type >= e1000_82580) { + E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); + E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); + } + + adapter->ptp_clock = ptp_clock_register(&adapter->ptp_caps, + &adapter->pdev->dev); + if (IS_ERR(adapter->ptp_clock)) { + adapter->ptp_clock = NULL; + dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n"); + } else { + dev_info(&adapter->pdev->dev, "added PHC on %s\n", + adapter->netdev->name); + adapter->flags |= IGB_FLAG_PTP; + } +} + +/** + * igb_ptp_stop - Disable PTP device and stop the overflow check. + * @adapter: Board private structure. + * + * This function stops the PTP support and cancels the delayed work. + **/ +void igb_ptp_stop(struct igb_adapter *adapter) +{ + switch (adapter->hw.mac.type) { + case e1000_82576: + case e1000_82580: + case e1000_i350: + case e1000_i354: + cancel_delayed_work_sync(&adapter->ptp_overflow_work); + break; + case e1000_i210: + case e1000_i211: + /* No delayed work to cancel. */ + break; + default: + return; + } + + cancel_work_sync(&adapter->ptp_tx_work); + if (adapter->ptp_tx_skb) { + dev_kfree_skb_any(adapter->ptp_tx_skb); + adapter->ptp_tx_skb = NULL; + } + + if (adapter->ptp_clock) { + ptp_clock_unregister(adapter->ptp_clock); + dev_info(&adapter->pdev->dev, "removed PHC on %s\n", + adapter->netdev->name); + adapter->flags &= ~IGB_FLAG_PTP; + } +} + +/** + * igb_ptp_reset - Re-enable the adapter for PTP following a reset. + * @adapter: Board private structure. + * + * This function handles the reset work required to re-enable the PTP device. + **/ +void igb_ptp_reset(struct igb_adapter *adapter) +{ + struct e1000_hw *hw = &adapter->hw; + + if (!(adapter->flags & IGB_FLAG_PTP)) + return; + + switch (adapter->hw.mac.type) { + case e1000_82576: + /* Dial the nominal frequency. */ + E1000_WRITE_REG(hw, E1000_TIMINCA, INCPERIOD_82576 | + INCVALUE_82576); + break; + case e1000_82580: + case e1000_i350: + case e1000_i354: + case e1000_i210: + case e1000_i211: + /* Enable the timer functions and interrupts. */ + E1000_WRITE_REG(hw, E1000_TSAUXC, 0x0); + E1000_WRITE_REG(hw, E1000_TSIM, E1000_TSIM_TXTS); + E1000_WRITE_REG(hw, E1000_IMS, E1000_IMS_TS); + break; + default: + /* No work to do. */ + return; + } + + /* Re-initialize the timer. */ + if ((hw->mac.type == e1000_i210) || (hw->mac.type == e1000_i211)) { + struct timespec ts = ktime_to_timespec(ktime_get_real()); + + igb_ptp_settime_i210(&adapter->ptp_caps, &ts); + } else { + timecounter_init(&adapter->tc, &adapter->cc, + ktime_to_ns(ktime_get_real())); + } +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h new file mode 100644 index 00000000..18da64a3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_regtest.h @@ -0,0 +1,249 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool register test data */ +struct igb_reg_test { + u16 reg; + u16 reg_offset; + u16 array_len; + u16 test_type; + u32 mask; + u32 write; +}; + +/* In the hardware, registers are laid out either singly, in arrays + * spaced 0x100 bytes apart, or in contiguous tables. We assume + * most tests take place on arrays or single registers (handled + * as a single-element array) and special-case the tables. + * Table tests are always pattern tests. + * + * We also make provision for some required setup steps by specifying + * registers to be written without any read-back testing. + */ + +#define PATTERN_TEST 1 +#define SET_READ_TEST 2 +#define WRITE_NO_TEST 3 +#define TABLE32_TEST 4 +#define TABLE64_TEST_LO 5 +#define TABLE64_TEST_HI 6 + +/* i210 reg test */ +static struct igb_reg_test reg_test_i210[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* RDH is read-only for i210, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0003FFF0, 0x0003FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x900FFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* i350 reg test */ +static struct igb_reg_test reg_test_i350[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + /* VET is readonly on i350 */ + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* RDH is read-only for i350, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0xC3FFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 16, TABLE64_TEST_HI, + 0xC3FFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* 82580 reg test */ +static struct igb_reg_test reg_test_82580[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_RDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + /* RDH is read-only for 82580, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_TDBAL(4), 0x40, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(4), 0x40, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(4), 0x40, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_TDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TDT(4), 0x40, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* 82576 reg test */ +static struct igb_reg_test reg_test_82576[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_RDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + /* Enable all queues before testing. */ + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, + { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, + /* RDH is read-only for 82576, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RDT(4), 0x40, 12, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 }, + { E1000_RXDCTL(4), 0x40, 12, WRITE_NO_TEST, 0, 0 }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_TDBAL(4), 0x40, 12, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(4), 0x40, 12, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(4), 0x40, 12, PATTERN_TEST, 0x000FFFF0, 0x000FFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB0FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA2, 0, 8, TABLE64_TEST_HI, + 0x83FFFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* 82575 register test */ +static struct igb_reg_test reg_test_82575[] = { + { E1000_FCAL, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_FCAH, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_FCT, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0xFFFFFFFF }, + { E1000_VET, 0x100, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_RDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* Enable all four RX queues before testing. */ + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, E1000_RXDCTL_QUEUE_ENABLE }, + /* RDH is read-only for 82575, only test RDT. */ + { E1000_RDT(0), 0x100, 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_RXDCTL(0), 0x100, 4, WRITE_NO_TEST, 0, 0 }, + { E1000_FCRTH, 0x100, 1, PATTERN_TEST, 0x0000FFF0, 0x0000FFF0 }, + { E1000_FCTTV, 0x100, 1, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { E1000_TIPG, 0x100, 1, PATTERN_TEST, 0x3FFFFFFF, 0x3FFFFFFF }, + { E1000_TDBAL(0), 0x100, 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { E1000_TDBAH(0), 0x100, 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_TDLEN(0), 0x100, 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0x003FFFFB }, + { E1000_RCTL, 0x100, 1, SET_READ_TEST, 0x04CFB3FE, 0xFFFFFFFF }, + { E1000_TCTL, 0x100, 1, SET_READ_TEST, 0xFFFFFFFF, 0x00000000 }, + { E1000_TXCW, 0x100, 1, PATTERN_TEST, 0xC000FFFF, 0x0000FFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_LO, + 0xFFFFFFFF, 0xFFFFFFFF }, + { E1000_RA, 0, 16, TABLE64_TEST_HI, + 0x800FFFFF, 0xFFFFFFFF }, + { E1000_MTA, 0, 128, TABLE32_TEST, + 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c new file mode 100644 index 00000000..015c8952 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.c @@ -0,0 +1,436 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#include + +#include "igb.h" +#include "igb_vmdq.h" +#include + +#ifdef CONFIG_IGB_VMDQ_NETDEV +int igb_vmdq_open(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct net_device *main_netdev = adapter->netdev; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + if (test_bit(__IGB_DOWN, &adapter->state)) { + DPRINTK(DRV, WARNING, + "Open %s before opening this device.\n", + main_netdev->name); + return -EAGAIN; + } + netif_carrier_off(dev); + vadapter->tx_ring->vmdq_netdev = dev; + vadapter->rx_ring->vmdq_netdev = dev; + if (is_valid_ether_addr(dev->dev_addr)) { + igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); + igb_add_mac_filter(adapter, dev->dev_addr, hw_queue); + } + netif_carrier_on(dev); + return 0; +} + +int igb_vmdq_close(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + netif_carrier_off(dev); + igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); + + vadapter->tx_ring->vmdq_netdev = NULL; + vadapter->rx_ring->vmdq_netdev = NULL; + return 0; +} + +netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + + return igb_xmit_frame_ring(skb, vadapter->tx_ring); +} + +struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + vadapter->net_stats.rx_packets += + E1000_READ_REG(hw, E1000_PFVFGPRC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGPRC(hw_queue), 0); + vadapter->net_stats.tx_packets += + E1000_READ_REG(hw, E1000_PFVFGPTC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGPTC(hw_queue), 0); + vadapter->net_stats.rx_bytes += + E1000_READ_REG(hw, E1000_PFVFGORC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGORC(hw_queue), 0); + vadapter->net_stats.tx_bytes += + E1000_READ_REG(hw, E1000_PFVFGOTC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFGOTC(hw_queue), 0); + vadapter->net_stats.multicast += + E1000_READ_REG(hw, E1000_PFVFMPRC(hw_queue)); + E1000_WRITE_REG(hw, E1000_PFVFMPRC(hw_queue), 0); + /* only return the current stats */ + return &vadapter->net_stats; +} + +/** + * igb_write_vm_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure + * + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table + **/ +static int igb_write_vm_addr_list(struct net_device *netdev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + int count = 0; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + /* return ENOMEM indicating insufficient memory for addresses */ + if (netdev_uc_count(netdev) > igb_available_rars(adapter)) + return -ENOMEM; + + if (!netdev_uc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + netdev_for_each_uc_addr(ha, netdev) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + igb_del_mac_filter(adapter, ha->addr, hw_queue); + igb_add_mac_filter(adapter, ha->addr, hw_queue); +#else + igb_del_mac_filter(adapter, ha->da_addr, hw_queue); + igb_add_mac_filter(adapter, ha->da_addr, hw_queue); +#endif + count++; + } + } + return count; +} + + +#define E1000_VMOLR_UPE 0x20000000 /* Unicast promiscuous mode */ +void igb_vmdq_set_rx_mode(struct net_device *dev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + u32 vmolr, rctl; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + /* Check for Promiscuous and All Multicast modes */ + vmolr = E1000_READ_REG(hw, E1000_VMOLR(hw_queue)); + + /* clear the affected bits */ + vmolr &= ~(E1000_VMOLR_UPE | E1000_VMOLR_MPME | + E1000_VMOLR_ROPE | E1000_VMOLR_ROMPE); + + if (dev->flags & IFF_PROMISC) { + vmolr |= E1000_VMOLR_UPE; + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl |= E1000_RCTL_UPE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + } else { + rctl = E1000_READ_REG(hw, E1000_RCTL); + rctl &= ~E1000_RCTL_UPE; + E1000_WRITE_REG(hw, E1000_RCTL, rctl); + if (dev->flags & IFF_ALLMULTI) { + vmolr |= E1000_VMOLR_MPME; + } else { + /* + * Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + if (igb_write_mc_addr_list(adapter->netdev) != 0) + vmolr |= E1000_VMOLR_ROMPE; + } +#ifdef HAVE_SET_RX_MODE + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + if (igb_write_vm_addr_list(dev) < 0) + vmolr |= E1000_VMOLR_UPE; +#endif + } + E1000_WRITE_REG(hw, E1000_VMOLR(hw_queue), vmolr); + + return; +} + +int igb_vmdq_set_mac(struct net_device *dev, void *p) +{ + struct sockaddr *addr = p; + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + igb_del_mac_filter(adapter, dev->dev_addr, hw_queue); + memcpy(dev->dev_addr, addr->sa_data, dev->addr_len); + return igb_add_mac_filter(adapter, dev->dev_addr, hw_queue); +} + +int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + + if (adapter->netdev->mtu < new_mtu) { + DPRINTK(PROBE, INFO, + "Set MTU on %s to >= %d " + "before changing MTU on %s\n", + adapter->netdev->name, new_mtu, dev->name); + return -EINVAL; + } + dev->mtu = new_mtu; + return 0; +} + +void igb_vmdq_tx_timeout(struct net_device *dev) +{ + return; +} + +void igb_vmdq_vlan_rx_register(struct net_device *dev, struct vlan_group *grp) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + vadapter->vlgrp = grp; + + igb_enable_vlan_tags(adapter); + E1000_WRITE_REG(hw, E1000_VMVIR(hw_queue), 0); + + return; +} +void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; +#ifndef HAVE_NETDEV_VLAN_FEATURES + struct net_device *v_netdev; +#endif + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + /* attempt to add filter to vlvf array */ + igb_vlvf_set(adapter, vid, TRUE, hw_queue); + +#ifndef HAVE_NETDEV_VLAN_FEATURES + + /* Copy feature flags from netdev to the vlan netdev for this vid. + * This allows things like TSO to bubble down to our vlan device. + */ + v_netdev = vlan_group_get_device(vadapter->vlgrp, vid); + v_netdev->features |= adapter->netdev->features; + vlan_group_set_device(vadapter->vlgrp, vid, v_netdev); +#endif + + return; +} +void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(dev); + struct igb_adapter *adapter = vadapter->real_adapter; + int hw_queue = vadapter->rx_ring->queue_index + + adapter->vfs_allocated_count; + + vlan_group_set_device(vadapter->vlgrp, vid, NULL); + /* remove vlan from VLVF table array */ + igb_vlvf_set(adapter, vid, FALSE, hw_queue); + + + return; +} + +static int igb_vmdq_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct e1000_hw *hw = &adapter->hw; + u32 status; + + if (hw->phy.media_type == e1000_media_type_copper) { + + ecmd->supported = (SUPPORTED_10baseT_Half | + SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | + SUPPORTED_100baseT_Full | + SUPPORTED_1000baseT_Full| + SUPPORTED_Autoneg | + SUPPORTED_TP); + ecmd->advertising = ADVERTISED_TP; + + if (hw->mac.autoneg == 1) { + ecmd->advertising |= ADVERTISED_Autoneg; + /* the e1000 autoneg seems to match ethtool nicely */ + ecmd->advertising |= hw->phy.autoneg_advertised; + } + + ecmd->port = PORT_TP; + ecmd->phy_address = hw->phy.addr; + } else { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE | + SUPPORTED_Autoneg); + + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE | + ADVERTISED_Autoneg); + + ecmd->port = PORT_FIBRE; + } + + ecmd->transceiver = XCVR_INTERNAL; + + status = E1000_READ_REG(hw, E1000_STATUS); + + if (status & E1000_STATUS_LU) { + + if ((status & E1000_STATUS_SPEED_1000) || + hw->phy.media_type != e1000_media_type_copper) + ecmd->speed = SPEED_1000; + else if (status & E1000_STATUS_SPEED_100) + ecmd->speed = SPEED_100; + else + ecmd->speed = SPEED_10; + + if ((status & E1000_STATUS_FD) || + hw->phy.media_type != e1000_media_type_copper) + ecmd->duplex = DUPLEX_FULL; + else + ecmd->duplex = DUPLEX_HALF; + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + ecmd->autoneg = hw->mac.autoneg ? AUTONEG_ENABLE : AUTONEG_DISABLE; + return 0; +} + + +static u32 igb_vmdq_get_msglevel(struct net_device *netdev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + return adapter->msg_enable; +} + +static void igb_vmdq_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + struct net_device *main_netdev = adapter->netdev; + + strncpy(drvinfo->driver, igb_driver_name, 32); + strncpy(drvinfo->version, igb_driver_version, 32); + + strncpy(drvinfo->fw_version, "N/A", 4); + snprintf(drvinfo->bus_info, 32, "%s VMDQ %d", main_netdev->name, + vadapter->rx_ring->queue_index); + drvinfo->n_stats = 0; + drvinfo->testinfo_len = 0; + drvinfo->regdump_len = 0; +} + +static void igb_vmdq_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + + struct igb_ring *tx_ring = vadapter->tx_ring; + struct igb_ring *rx_ring = vadapter->rx_ring; + + ring->rx_max_pending = IGB_MAX_RXD; + ring->tx_max_pending = IGB_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = rx_ring->count; + ring->tx_pending = tx_ring->count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} +static u32 igb_vmdq_get_rx_csum(struct net_device *netdev) +{ + struct igb_vmdq_adapter *vadapter = netdev_priv(netdev); + struct igb_adapter *adapter = vadapter->real_adapter; + + return test_bit(IGB_RING_FLAG_RX_CSUM, &adapter->rx_ring[0]->flags); +} + + +static struct ethtool_ops igb_vmdq_ethtool_ops = { + .get_settings = igb_vmdq_get_settings, + .get_drvinfo = igb_vmdq_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_ringparam = igb_vmdq_get_ringparam, + .get_rx_csum = igb_vmdq_get_rx_csum, + .get_tx_csum = ethtool_op_get_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, + .get_msglevel = igb_vmdq_get_msglevel, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, +#endif +#ifdef HAVE_ETHTOOL_GET_PERM_ADDR + .get_perm_addr = ethtool_op_get_perm_addr, +#endif +}; + +void igb_vmdq_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &igb_vmdq_ethtool_ops); +} + + +#endif /* CONFIG_IGB_VMDQ_NETDEV */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h new file mode 100644 index 00000000..e51e7c4e --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/igb_vmdq.h @@ -0,0 +1,46 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IGB_VMDQ_H_ +#define _IGB_VMDQ_H_ + +#ifdef CONFIG_IGB_VMDQ_NETDEV +int igb_vmdq_open(struct net_device *dev); +int igb_vmdq_close(struct net_device *dev); +netdev_tx_t igb_vmdq_xmit_frame(struct sk_buff *skb, struct net_device *dev); +struct net_device_stats *igb_vmdq_get_stats(struct net_device *dev); +void igb_vmdq_set_rx_mode(struct net_device *dev); +int igb_vmdq_set_mac(struct net_device *dev, void *addr); +int igb_vmdq_change_mtu(struct net_device *dev, int new_mtu); +void igb_vmdq_tx_timeout(struct net_device *dev); +void igb_vmdq_vlan_rx_register(struct net_device *dev, + struct vlan_group *grp); +void igb_vmdq_vlan_rx_add_vid(struct net_device *dev, unsigned short vid); +void igb_vmdq_vlan_rx_kill_vid(struct net_device *dev, unsigned short vid); +void igb_vmdq_set_ethtool_ops(struct net_device *netdev); +#endif /* CONFIG_IGB_VMDQ_NETDEV */ +#endif /* _IGB_VMDQ_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c new file mode 100644 index 00000000..bde3a83c --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.c @@ -0,0 +1,1482 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "igb.h" +#include "kcompat.h" + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +/* From lib/vsprintf.c */ +#include + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define _kc_ZEROPAD 1 /* pad with zero */ +#define _kc_SIGN 2 /* unsigned/signed long */ +#define _kc_PLUS 4 /* show plus */ +#define _kc_SPACE 8 /* space if plus */ +#define _kc_LEFT 16 /* left justified */ +#define _kc_SPECIAL 32 /* 0x */ +#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & _kc_LARGE) ? large_digits : small_digits; + if (type & _kc_LEFT) + type &= ~_kc_ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & _kc_ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & _kc_SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & _kc_PLUS) { + sign = '+'; + size--; + } else if (type & _kc_SPACE) { + sign = ' '; + size--; + } + } + if (type & _kc_SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & _kc_SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & _kc_LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= _kc_LEFT; goto repeat; + case '+': flags |= _kc_PLUS; goto repeat; + case ' ': flags |= _kc_SPACE; goto repeat; + case '#': flags |= _kc_SPECIAL; goto repeat; + case '0': flags |= _kc_ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= _kc_LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & _kc_LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & _kc_LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= _kc_ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= _kc_LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= _kc_SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & _kc_SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & _kc_SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & _kc_SIGN) + num = (signed int) num; + } + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = _kc_vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} +#endif /* < 2.4.8 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#if defined(CONFIG_HIGHMEM) + +#ifndef PCI_DRAM_OFFSET +#define PCI_DRAM_OFFSET 0 +#endif + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return (((u64) (page - mem_map) << PAGE_SHIFT) + offset + + PCI_DRAM_OFFSET); +} + +#else /* CONFIG_HIGHMEM */ + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return pci_map_single(dev, (void *)page_address(page) + offset, size, + direction); +} + +#endif /* CONFIG_HIGHMEM */ + +void +_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, + int direction) +{ + return pci_unmap_single(dev, dma_addr, size, direction); +} + +#endif /* 2.4.13 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +int +_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) +{ + if (!pci_dma_supported(dev, mask)) + return -EIO; + dev->dma_mask = mask; + return 0; +} + +int +_kc_pci_request_regions(struct pci_dev *dev, char *res_name) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) { + if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { + if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } + } + return 0; +} + +void +_kc_pci_release_regions(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) + release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + + else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) + release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + } +} + +/**************************************/ +/* NETWORK DRIVER API */ + +struct net_device * +_kc_alloc_etherdev(int sizeof_priv) +{ + struct net_device *dev; + int alloc_size; + + alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; + dev = kzalloc(alloc_size, GFP_KERNEL); + if (!dev) + return NULL; + + if (sizeof_priv) + dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); + dev->name[0] = '\0'; + ether_setup(dev); + + return dev; +} + +int +_kc_is_valid_ether_addr(u8 *addr) +{ + const char zaddr[6] = { 0, }; + + return !(addr[0] & 1) && memcmp(addr, zaddr, 6); +} + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +int +_kc_pci_set_power_state(struct pci_dev *dev, int state) +{ + return 0; +} + +int +_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) +{ + return 0; +} + +#endif /* 2.4.6 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + frag->page = page; + frag->page_offset = off; + frag->size = size; + skb_shinfo(skb)->nr_frags = i + 1; +} + +/* + * Original Copyright: + * find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + ffs(tmp); +} + +size_t _kc_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#ifndef do_div +#if BITS_PER_LONG == 32 +uint32_t __attribute__((weak)) _kc__div64_32(uint64_t *n, uint32_t base) +{ + uint64_t rem = *n; + uint64_t b = base; + uint64_t res, d = 1; + uint32_t high = rem >> 32; + + /* Reduce the thing a bit first */ + res = 0; + if (high >= base) { + high /= base; + res = (uint64_t) high << 32; + rem -= (uint64_t) (high*base) << 32; + } + + while ((int64_t)b > 0 && b < rem) { + b = b+b; + d = d+d; + } + + do { + if (rem >= b) { + rem -= b; + res += d; + } + b >>= 1; + d >>= 1; + } while (d); + + *n = res; + return rem; +} +#endif /* BITS_PER_LONG == 32 */ +#endif /* do_div */ +#endif /* 2.6.0 => 2.4.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + return (i >= size) ? (size - 1) : i; +} +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +char *_kc_kstrdup(const char *s, unsigned int gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +void *_kc_kzalloc(size_t size, int flags) +{ + void *ret = kmalloc(size, flags); + if (ret) + memset(ret, 0, size); + return ret; +} +#endif /* <= 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) +int _kc_skb_pad(struct sk_buff *skb, int pad) +{ + int ntail; + + /* If the skbuff is non linear tailroom is always zero.. */ + if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { + memset(skb->data+skb->len, 0, pad); + return 0; + } + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); + goto free_skb; + } + +#ifdef MAX_SKB_FRAGS + if (skb_is_nonlinear(skb) && + !__pskb_pull_tail(skb, skb->data_len)) + goto free_skb; + +#endif + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: + kfree_skb(skb); + return -ENOMEM; +} + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +int _kc_pci_save_state(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct adapter_struct *adapter = netdev_priv(netdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset, pcie_link_status; + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) + /* no ->dev for 2.4 kernels */ + WARN_ON(pdev->dev.driver_data == NULL); +#endif + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset) { + if (!pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + } + pci_config_space_ich8lan(); +#ifdef HAVE_PCI_ERS + if (adapter->config_space == NULL) +#else + WARN_ON(adapter->config_space != NULL); +#endif + adapter->config_space = kmalloc(size, GFP_KERNEL); + if (!adapter->config_space) { + printk(KERN_ERR "Out of memory in pci_save_state\n"); + return -ENOMEM; + } + for (i = 0; i < (size / 4); i++) + pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); + return 0; +} + +void _kc_pci_restore_state(struct pci_dev *pdev) +{ + struct net_device *netdev = pci_get_drvdata(pdev); + struct adapter_struct *adapter = netdev_priv(netdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset; + u16 pcie_link_status; + + if (adapter->config_space != NULL) { + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset && + !pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + + pci_config_space_ich8lan(); + for (i = 0; i < (size / 4); i++) + pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); +#ifndef HAVE_PCI_ERS + kfree(adapter->config_space); + adapter->config_space = NULL; +#endif + } +} +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +void _kc_free_netdev(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + + if (adapter->config_space != NULL) + kfree(adapter->config_space); +#ifdef CONFIG_SYSFS + if (netdev->reg_state == NETREG_UNINITIALIZED) { + kfree((char *)netdev - netdev->padded); + } else { + BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); + netdev->reg_state = NETREG_RELEASED; + class_device_put(&netdev->class_dev); + } +#else + kfree((char *)netdev - netdev->padded); +#endif +} +#endif + +void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) +{ + void *p; + + p = kzalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} +#endif /* <= 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev) +{ + return ((struct adapter_struct *)netdev_priv(netdev))->pdev; +} +#endif /* < 2.6.21 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +/* hexdump code taken from lib/hexdump.c */ +static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, + int groupsize, unsigned char *linebuf, + size_t linebuflen, bool ascii) +{ + const u8 *ptr = buf; + u8 ch; + int j, lx = 0; + int ascii_column; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + if (!len) + goto nil; + if (len > rowsize) /* limit to one line at a time */ + len = rowsize; + if ((len % groupsize) != 0) /* no mixed size output */ + groupsize = 1; + + switch (groupsize) { + case 8: { + const u64 *ptr8 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + ascii_column = 17 * ngroups + 2; + break; + } + + case 4: { + const u32 *ptr4 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%8.8x", j ? " " : "", *(ptr4 + j)); + ascii_column = 9 * ngroups + 2; + break; + } + + case 2: { + const u16 *ptr2 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%4.4x", j ? " " : "", *(ptr2 + j)); + ascii_column = 5 * ngroups + 2; + break; + } + + default: + for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = hex_asc(ch >> 4); + linebuf[lx++] = hex_asc(ch & 0x0f); + linebuf[lx++] = ' '; + } + if (j) + lx--; + + ascii_column = 3 * rowsize + 2; + break; + } + if (!ascii) + goto nil; + + while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + linebuf[lx++] = ' '; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) + linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] + : '.'; +nil: + linebuf[lx++] = '\0'; +} + +void _kc_print_hex_dump(const char *level, + const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ + const u8 *ptr = buf; + int i, linelen, remaining = len; + unsigned char linebuf[200]; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + for (i = 0; i < len; i += rowsize) { + linelen = min(remaining, rowsize); + remaining -= rowsize; + _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, + linebuf, sizeof(linebuf), ascii); + + switch (prefix_type) { + case DUMP_PREFIX_ADDRESS: + printk("%s%s%*p: %s\n", level, prefix_str, + (int)(2 * sizeof(void *)), ptr + i, linebuf); + break; + case DUMP_PREFIX_OFFSET: + printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); + break; + default: + printk("%s%s%s\n", level, prefix_str, linebuf); + break; + } + } +} + +#ifdef HAVE_I2C_SUPPORT +struct i2c_client * +_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info) +{ + struct i2c_client *client; + int status; + + client = kzalloc(sizeof *client, GFP_KERNEL); + if (!client) + return NULL; + + client->adapter = adap; + + client->dev.platform_data = info->platform_data; + + client->flags = info->flags; + client->addr = info->addr; + + strlcpy(client->name, info->type, sizeof(client->name)); + + /* Check for address business */ + status = i2c_check_addr(adap, client->addr); + if (status) + goto out_err; + + client->dev.parent = &client->adapter->dev; + client->dev.bus = &i2c_bus_type; + + status = i2c_attach_client(client); + if (status) + goto out_err; + + dev_dbg(&adap->dev, "client [%s] registered with bus id %s\n", + client->name, dev_name(&client->dev)); + + return client; + +out_err: + dev_err(&adap->dev, "Failed to register i2c client %s at 0x%02x " + "(%d)\n", client->name, client->addr, status); + kfree(client); + return NULL; +} +#endif /* HAVE_I2C_SUPPORT */ +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifdef NAPI +struct net_device *napi_to_poll_dev(const struct napi_struct *napi) +{ + struct adapter_q_vector *q_vector = container_of(napi, + struct adapter_q_vector, + napi); + return &q_vector->poll_dev; +} + +int __kc_adapter_clean(struct net_device *netdev, int *budget) +{ + int work_done; + int work_to_do = min(*budget, netdev->quota); + /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ + struct napi_struct *napi = netdev->priv; + work_done = napi->poll(napi, work_to_do); + *budget -= work_done; + netdev->quota -= work_done; + return (work_done >= work_to_do) ? 1 : 0; +} +#endif /* NAPI */ +#endif /* <= 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) +{ + struct pci_dev *parent = pdev->bus->self; + u16 link_state; + int pos; + + if (!parent) + return; + + pos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (pos) { + pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); + link_state &= ~state; + pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); + } +} +#endif /* < 2.6.26 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +#ifdef HAVE_TX_MQ +void _kc_netif_tx_stop_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_stop_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +} +void _kc_netif_tx_wake_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_wake_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); +} +void _kc_netif_tx_start_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_start_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_start_subqueue(netdev, i); +} +#endif /* HAVE_TX_MQ */ + +#ifndef __WARN_printf +void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) +{ + va_list args; + + printk(KERN_WARNING "------------[ cut here ]------------\n"); + printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + dump_stack(); +} +#endif /* __WARN_printf */ +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) + +int +_kc_pci_prepare_to_sleep(struct pci_dev *dev) +{ + pci_power_t target_state; + int error; + + target_state = pci_choose_state(dev, PMSG_SUSPEND); + + pci_enable_wake(dev, target_state, true); + + error = pci_set_power_state(dev, target_state); + + if (error) + pci_enable_wake(dev, target_state, false); + + return error; +} + +int +_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) +{ + int err; + + err = pci_enable_wake(dev, PCI_D3cold, enable); + if (err) + goto out; + + err = pci_enable_wake(dev, PCI_D3hot, enable); + +out: + return err; +} +#endif /* < 2.6.28 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) +static void __kc_pci_set_master(struct pci_dev *pdev, bool enable) +{ + u16 old_cmd, cmd; + + pci_read_config_word(pdev, PCI_COMMAND, &old_cmd); + if (enable) + cmd = old_cmd | PCI_COMMAND_MASTER; + else + cmd = old_cmd & ~PCI_COMMAND_MASTER; + if (cmd != old_cmd) { + dev_dbg(pci_dev_to_dev(pdev), "%s bus mastering\n", + enable ? "enabling" : "disabling"); + pci_write_config_word(pdev, PCI_COMMAND, cmd); + } +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,7) ) + pdev->is_busmaster = enable; +#endif +} + +void _kc_pci_clear_master(struct pci_dev *dev) +{ + __kc_pci_set_master(dev, false); +} +#endif /* < 2.6.29 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) +#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +int _kc_pci_num_vf(struct pci_dev *dev) +{ + int num_vf = 0; +#ifdef CONFIG_PCI_IOV + struct pci_dev *vfdev; + + /* loop through all ethernet devices starting at PF dev */ + vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, NULL); + while (vfdev) { + if (vfdev->is_virtfn && vfdev->physfn == dev) + num_vf++; + + vfdev = pci_get_class(PCI_CLASS_NETWORK_ETHERNET << 8, vfdev); + } + +#endif + return num_vf; +} +#endif /* RHEL_RELEASE_CODE */ +#endif /* < 2.6.34 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) +#ifdef HAVE_TX_MQ +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + struct Qdisc *qdisc; + int i; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if ( txq < real_num) { + dev->real_num_tx_queues = txq; + for (i = txq; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); + } + } + } +} +#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ +#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ +#endif /* HAVE_TX_MQ */ + +ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, + const void __user *from, size_t count) +{ + loff_t pos = *ppos; + size_t res; + + if (pos < 0) + return -EINVAL; + if (pos >= available || !count) + return 0; + if (count > available - pos) + count = available - pos; + res = copy_from_user(to + pos, from, count); + if (res == count) + return -EFAULT; + count -= res; + *ppos = pos + count; + return count; +} + +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +static const u32 _kc_flags_dup_features = + (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); + +u32 _kc_ethtool_op_get_flags(struct net_device *dev) +{ + return dev->features & _kc_flags_dup_features; +} + +int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) +{ + if (data & ~supported) + return -EINVAL; + + dev->features = ((dev->features & ~_kc_flags_dup_features) | + (data & _kc_flags_dup_features)); + return 0; +} +#endif /* < 2.6.36 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) + + + +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ +#endif /* < 2.6.39 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size, unsigned int truesize) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += truesize; +} + +int _kc_simple_open(struct inode *inode, struct file *file) +{ + if (inode->i_private) + file->private_data = inode->i_private; + + return 0; +} + +#endif /* < 3.4.0 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) +#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ + !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) +static inline int __kc_pcie_cap_version(struct pci_dev *dev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, ®16); + return reg16 & PCI_EXP_FLAGS_VERS; +} + +static inline bool __kc_pcie_cap_has_devctl(const struct pci_dev __always_unused *dev) +{ + return true; +} + +static inline bool __kc_pcie_cap_has_lnkctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return __kc_pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_ENDPOINT || + type == PCI_EXP_TYPE_LEG_END; +} + +static inline bool __kc_pcie_cap_has_sltctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + int pos; + u16 pcie_flags_reg; + + pos = pci_find_capability(dev, PCI_CAP_ID_EXP); + if (!pos) + return 0; + pci_read_config_word(dev, pos + PCI_EXP_FLAGS, &pcie_flags_reg); + + return __kc_pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + (type == PCI_EXP_TYPE_DOWNSTREAM && + pcie_flags_reg & PCI_EXP_FLAGS_SLOT); +} + +static inline bool __kc_pcie_cap_has_rtctl(struct pci_dev *dev) +{ + int type = pci_pcie_type(dev); + + return __kc_pcie_cap_version(dev) > 1 || + type == PCI_EXP_TYPE_ROOT_PORT || + type == PCI_EXP_TYPE_RC_EC; +} + +static bool __kc_pcie_capability_reg_implemented(struct pci_dev *dev, int pos) +{ + if (!pci_is_pcie(dev)) + return false; + + switch (pos) { + case PCI_EXP_FLAGS_TYPE: + return true; + case PCI_EXP_DEVCAP: + case PCI_EXP_DEVCTL: + case PCI_EXP_DEVSTA: + return __kc_pcie_cap_has_devctl(dev); + case PCI_EXP_LNKCAP: + case PCI_EXP_LNKCTL: + case PCI_EXP_LNKSTA: + return __kc_pcie_cap_has_lnkctl(dev); + case PCI_EXP_SLTCAP: + case PCI_EXP_SLTCTL: + case PCI_EXP_SLTSTA: + return __kc_pcie_cap_has_sltctl(dev); + case PCI_EXP_RTCTL: + case PCI_EXP_RTCAP: + case PCI_EXP_RTSTA: + return __kc_pcie_cap_has_rtctl(dev); + case PCI_EXP_DEVCAP2: + case PCI_EXP_DEVCTL2: + case PCI_EXP_LNKCAP2: + case PCI_EXP_LNKCTL2: + case PCI_EXP_LNKSTA2: + return __kc_pcie_cap_version(dev) > 1; + default: + return false; + } +} + +/* + * Note that these accessor functions are only for the "PCI Express + * Capability" (see PCIe spec r3.0, sec 7.8). They do not apply to the + * other "PCI Express Extended Capabilities" (AER, VC, ACS, MFVC, etc.) + */ +int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val) +{ + int ret; + + *val = 0; + if (pos & 1) + return -EINVAL; + + if (__kc_pcie_capability_reg_implemented(dev, pos)) { + ret = pci_read_config_word(dev, pci_pcie_cap(dev) + pos, val); + /* + * Reset *val to 0 if pci_read_config_word() fails, it may + * have been written as 0xFFFF if hardware error happens + * during pci_read_config_word(). + */ + if (ret) + *val = 0; + return ret; + } + + /* + * For Functions that do not implement the Slot Capabilities, + * Slot Status, and Slot Control registers, these spaces must + * be hardwired to 0b, with the exception of the Presence Detect + * State bit in the Slot Status register of Downstream Ports, + * which must be hardwired to 1b. (PCIe Base Spec 3.0, sec 7.8) + */ + if (pci_is_pcie(dev) && pos == PCI_EXP_SLTSTA && + pci_pcie_type(dev) == PCI_EXP_TYPE_DOWNSTREAM) { + *val = PCI_EXP_SLTSTA_PDS; + } + + return 0; +} + +int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val) +{ + if (pos & 1) + return -EINVAL; + + if (!__kc_pcie_capability_reg_implemented(dev, pos)) + return 0; + + return pci_write_config_word(dev, pci_pcie_cap(dev) + pos, val); +} + +int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, + u16 clear, u16 set) +{ + int ret; + u16 val; + + ret = __kc_pcie_capability_read_word(dev, pos, &val); + if (!ret) { + val &= ~clear; + val |= set; + ret = __kc_pcie_capability_write_word(dev, pos, val); + } + + return ret; +} +#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) && \ + !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) */ +#endif /* < 3.7.0 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) +#endif /* 3.9.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + unsigned int vfs_assigned = 0; +#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED + int pos; + struct pci_dev *vfdev; + unsigned short dev_id; + + /* only search if we are a PF */ + if (!dev->is_physfn) + return 0; + + /* find SR-IOV capability */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return 0; + + /* + * determine the device ID for the VFs, the vendor ID will be the + * same as the PF so there is no need to check for that one + */ + pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); + + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(dev->vendor, dev_id, NULL); + while (vfdev) { + /* + * It is considered assigned if it is a virtual function with + * our dev as the physical function and the assigned bit is set + */ + if (vfdev->is_virtfn && (vfdev->physfn == dev) && + (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) + vfs_assigned++; + + vfdev = pci_get_device(dev->vendor, dev_id, vfdev); + } + +#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ + return vfs_assigned; +} + +#endif /* CONFIG_PCI_IOV */ +#endif /* 3.10.0 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h new file mode 100644 index 00000000..e2cf71e0 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat.h @@ -0,0 +1,3918 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _KCOMPAT_H_ +#define _KCOMPAT_H_ + +#ifndef LINUX_VERSION_CODE +#include +#else +#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* NAPI enable/disable flags here */ +#define NAPI + +#define adapter_struct igb_adapter +#define adapter_q_vector igb_q_vector +#define NAPI + +/* and finally set defines so that the code sees the changes */ +#ifdef NAPI +#else +#endif /* NAPI */ + +/* packet split disable/enable */ +#ifdef DISABLE_PACKET_SPLIT +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +#define CONFIG_IGB_DISABLE_PACKET_SPLIT +#endif +#endif /* DISABLE_PACKET_SPLIT */ + +/* MSI compatibility code for all kernels and drivers */ +#ifdef DISABLE_PCI_MSI +#undef CONFIG_PCI_MSI +#endif +#ifndef CONFIG_PCI_MSI +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +struct msix_entry { + u16 vector; /* kernel uses to write allocated vector */ + u16 entry; /* driver uses to specify entry, OS writes */ +}; +#endif +#undef pci_enable_msi +#define pci_enable_msi(a) -ENOTSUPP +#undef pci_disable_msi +#define pci_disable_msi(a) do {} while (0) +#undef pci_enable_msix +#define pci_enable_msix(a, b, c) -ENOTSUPP +#undef pci_disable_msix +#define pci_disable_msix(a) do {} while (0) +#define msi_remove_pci_irq_vectors(a) do {} while (0) +#endif /* CONFIG_PCI_MSI */ +#ifdef DISABLE_PM +#undef CONFIG_PM +#endif + +#ifdef DISABLE_NET_POLL_CONTROLLER +#undef CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef PMSG_SUSPEND +#define PMSG_SUSPEND 3 +#endif + +/* generic boolean compatibility */ +#undef TRUE +#undef FALSE +#define TRUE true +#define FALSE false +#ifdef GCC_VERSION +#if ( GCC_VERSION < 3000 ) +#define _Bool char +#endif +#else +#define _Bool char +#endif + +/* kernels less than 2.4.14 don't have this */ +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef module_param +#define module_param(v,t,p) MODULE_PARM(v, "i"); +#endif + +#ifndef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffffffffffffULL +#endif + +#ifndef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0x00000000ffffffffULL +#endif + +#ifndef PCI_CAP_ID_EXP +#define PCI_CAP_ID_EXP 0x10 +#endif + +#ifndef PCIE_LINK_STATE_L0S +#define PCIE_LINK_STATE_L0S 1 +#endif +#ifndef PCIE_LINK_STATE_L1 +#define PCIE_LINK_STATE_L1 2 +#endif + +#ifndef mmiowb +#ifdef CONFIG_IA64 +#define mmiowb() asm volatile ("mf.a" ::: "memory") +#else +#define mmiowb() +#endif +#endif + +#ifndef SET_NETDEV_DEV +#define SET_NETDEV_DEV(net, pdev) +#endif + +#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#define free_netdev(x) kfree(x) +#endif + +#ifdef HAVE_POLL_CONTROLLER +#define CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef SKB_DATAREF_SHIFT +/* if we do not have the infrastructure to detect if skb_header is cloned + just return false in all cases */ +#define skb_header_cloned(x) 0 +#endif + +#ifndef NETIF_F_GSO +#define gso_size tso_size +#define gso_segs tso_segs +#endif + +#ifndef NETIF_F_GRO +#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \ + vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan) +#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb) +#endif + +#ifndef NETIF_F_SCTP_CSUM +#define NETIF_F_SCTP_CSUM 0 +#endif + +#ifndef NETIF_F_LRO +#define NETIF_F_LRO (1 << 15) +#endif + +#ifndef NETIF_F_NTUPLE +#define NETIF_F_NTUPLE (1 << 27) +#endif + +#ifndef IPPROTO_SCTP +#define IPPROTO_SCTP 132 +#endif + +#ifndef CHECKSUM_PARTIAL +#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW +#endif + +#ifndef __read_mostly +#define __read_mostly +#endif + +#ifndef MII_RESV1 +#define MII_RESV1 0x17 /* Reserved... */ +#endif + +#ifndef unlikely +#define unlikely(_x) _x +#define likely(_x) _x +#endif + +#ifndef WARN_ON +#define WARN_ON(x) +#endif + +#ifndef PCI_DEVICE +#define PCI_DEVICE(vend,dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID +#endif + +#ifndef node_online +#define node_online(node) ((node) == 0) +#endif + +#ifndef num_online_cpus +#define num_online_cpus() smp_num_cpus +#endif + +#ifndef cpu_online +#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map) +#endif + +#ifndef _LINUX_RANDOM_H +#include +#endif + +#ifndef DECLARE_BITMAP +#ifndef BITS_TO_LONGS +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#endif +#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] +#endif + +#ifndef VLAN_HLEN +#define VLAN_HLEN 4 +#endif + +#ifndef VLAN_ETH_HLEN +#define VLAN_ETH_HLEN 18 +#endif + +#ifndef VLAN_ETH_FRAME_LEN +#define VLAN_ETH_FRAME_LEN 1518 +#endif + +#if !defined(IXGBE_DCA) && !defined(IGB_DCA) +#define dca_get_tag(b) 0 +#define dca_add_requester(a) -1 +#define dca_remove_requester(b) do { } while(0) +#define DCA_PROVIDER_ADD 0x0001 +#define DCA_PROVIDER_REMOVE 0x0002 +#endif + +#ifndef DCA_GET_TAG_TWO_ARGS +#define dca3_get_tag(a,b) dca_get_tag(b) +#endif + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#if defined(__i386__) || defined(__x86_64__) +#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#endif +#endif + +/* taken from 2.6.24 definition in linux/kernel.h */ +#ifndef IS_ALIGNED +#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) +#endif + +#ifdef IS_ENABLED +#undef IS_ENABLED +#undef __ARG_PLACEHOLDER_1 +#undef config_enabled +#undef _config_enabled +#undef __config_enabled +#undef ___config_enabled +#endif + +#define __ARG_PLACEHOLDER_1 0, +#define config_enabled(cfg) _config_enabled(cfg) +#define _config_enabled(value) __config_enabled(__ARG_PLACEHOLDER_##value) +#define __config_enabled(arg1_or_junk) ___config_enabled(arg1_or_junk 1, 0) +#define ___config_enabled(__ignored, val, ...) val + +#define IS_ENABLED(option) \ + (config_enabled(option) || config_enabled(option##_MODULE)) + +#if !defined(NETIF_F_HW_VLAN_TX) && !defined(NETIF_F_HW_VLAN_CTAG_TX) +struct _kc_vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_ethhdr _kc_vlan_ethhdr +struct _kc_vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_hdr _kc_vlan_hdr +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#define vlan_tx_tag_present(_skb) 0 +#define vlan_tx_tag_get(_skb) 0 +#endif +#endif /* NETIF_F_HW_VLAN_TX && NETIF_F_HW_VLAN_CTAG_TX */ + +#ifndef VLAN_PRIO_SHIFT +#define VLAN_PRIO_SHIFT 13 +#endif + + +#ifndef __GFP_COLD +#define __GFP_COLD 0 +#endif + +#ifndef __GFP_COMP +#define __GFP_COMP 0 +#endif + +/*****************************************************************************/ +/* Installations with ethtool version without eeprom, adapter id, or statistics + * support */ + +#ifndef ETH_GSTRING_LEN +#define ETH_GSTRING_LEN 32 +#endif + +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x1d +#undef ethtool_drvinfo +#define ethtool_drvinfo k_ethtool_drvinfo +struct k_ethtool_drvinfo { + u32 cmd; + char driver[32]; + char version[32]; + char fw_version[32]; + char bus_info[32]; + char reserved1[32]; + char reserved2[16]; + u32 n_stats; + u32 testinfo_len; + u32 eedump_len; + u32 regdump_len; +}; + +struct ethtool_stats { + u32 cmd; + u32 n_stats; + u64 data[0]; +}; +#endif /* ETHTOOL_GSTATS */ + +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x1c +#endif /* ETHTOOL_PHYS_ID */ + +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x1b +enum ethtool_stringset { + ETH_SS_TEST = 0, + ETH_SS_STATS, +}; +struct ethtool_gstrings { + u32 cmd; /* ETHTOOL_GSTRINGS */ + u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ + u32 len; /* number of strings in the string set */ + u8 data[0]; +}; +#endif /* ETHTOOL_GSTRINGS */ + +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x1a +enum ethtool_test_flags { + ETH_TEST_FL_OFFLINE = (1 << 0), + ETH_TEST_FL_FAILED = (1 << 1), +}; +struct ethtool_test { + u32 cmd; + u32 flags; + u32 reserved; + u32 len; + u64 data[0]; +}; +#endif /* ETHTOOL_TEST */ + +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0xb +#undef ETHTOOL_GREGS +struct ethtool_eeprom { + u32 cmd; + u32 magic; + u32 offset; + u32 len; + u8 data[0]; +}; + +struct ethtool_value { + u32 cmd; + u32 data; +}; +#endif /* ETHTOOL_GEEPROM */ + +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0xa +#endif /* ETHTOOL_GLINK */ + +#ifndef ETHTOOL_GWOL +#define ETHTOOL_GWOL 0x5 +#define ETHTOOL_SWOL 0x6 +#define SOPASS_MAX 6 +struct ethtool_wolinfo { + u32 cmd; + u32 supported; + u32 wolopts; + u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ +}; +#endif /* ETHTOOL_GWOL */ + +#ifndef ETHTOOL_GREGS +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ +#define ethtool_regs _kc_ethtool_regs +/* for passing big chunks of data */ +struct _kc_ethtool_regs { + u32 cmd; + u32 version; /* driver-specific, indicates different chips/revs */ + u32 len; /* bytes */ + u8 data[0]; +}; +#endif /* ETHTOOL_GREGS */ + +#ifndef ETHTOOL_GMSGLVL +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#endif +#ifndef ETHTOOL_SMSGLVL +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ +#endif +#ifndef ETHTOOL_NWAY_RST +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ +#endif +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0x0000000a /* Get link status */ +#endif +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#endif +#ifndef ETHTOOL_SEEPROM +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ +#endif +#ifndef ETHTOOL_GCOALESCE +#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ +/* for configuring coalescing parameters of chip */ +#define ethtool_coalesce _kc_ethtool_coalesce +struct _kc_ethtool_coalesce { + u32 cmd; /* ETHTOOL_{G,S}COALESCE */ + + /* How many usecs to delay an RX interrupt after + * a packet arrives. If 0, only rx_max_coalesced_frames + * is used. + */ + u32 rx_coalesce_usecs; + + /* How many packets to delay an RX interrupt after + * a packet arrives. If 0, only rx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause RX interrupts to never be + * generated. + */ + u32 rx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 rx_coalesce_usecs_irq; + u32 rx_max_coalesced_frames_irq; + + /* How many usecs to delay a TX interrupt after + * a packet is sent. If 0, only tx_max_coalesced_frames + * is used. + */ + u32 tx_coalesce_usecs; + + /* How many packets to delay a TX interrupt after + * a packet is sent. If 0, only tx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause TX interrupts to never be + * generated. + */ + u32 tx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 tx_coalesce_usecs_irq; + u32 tx_max_coalesced_frames_irq; + + /* How many usecs to delay in-memory statistics + * block updates. Some drivers do not have an in-memory + * statistic block, and in such cases this value is ignored. + * This value must not be zero. + */ + u32 stats_block_coalesce_usecs; + + /* Adaptive RX/TX coalescing is an algorithm implemented by + * some drivers to improve latency under low packet rates and + * improve throughput under high packet rates. Some drivers + * only implement one of RX or TX adaptive coalescing. Anything + * not implemented by the driver causes these values to be + * silently ignored. + */ + u32 use_adaptive_rx_coalesce; + u32 use_adaptive_tx_coalesce; + + /* When the packet rate (measured in packets per second) + * is below pkt_rate_low, the {rx,tx}_*_low parameters are + * used. + */ + u32 pkt_rate_low; + u32 rx_coalesce_usecs_low; + u32 rx_max_coalesced_frames_low; + u32 tx_coalesce_usecs_low; + u32 tx_max_coalesced_frames_low; + + /* When the packet rate is below pkt_rate_high but above + * pkt_rate_low (both measured in packets per second) the + * normal {rx,tx}_* coalescing parameters are used. + */ + + /* When the packet rate is (measured in packets per second) + * is above pkt_rate_high, the {rx,tx}_*_high parameters are + * used. + */ + u32 pkt_rate_high; + u32 rx_coalesce_usecs_high; + u32 rx_max_coalesced_frames_high; + u32 tx_coalesce_usecs_high; + u32 tx_max_coalesced_frames_high; + + /* How often to do adaptive coalescing packet rate sampling, + * measured in seconds. Must not be zero. + */ + u32 rate_sample_interval; +}; +#endif /* ETHTOOL_GCOALESCE */ + +#ifndef ETHTOOL_SCOALESCE +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ +#endif +#ifndef ETHTOOL_GRINGPARAM +#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ +/* for configuring RX/TX ring parameters */ +#define ethtool_ringparam _kc_ethtool_ringparam +struct _kc_ethtool_ringparam { + u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ + + /* Read only attributes. These indicate the maximum number + * of pending RX/TX ring entries the driver will allow the + * user to set. + */ + u32 rx_max_pending; + u32 rx_mini_max_pending; + u32 rx_jumbo_max_pending; + u32 tx_max_pending; + + /* Values changeable by the user. The valid values are + * in the range 1 to the "*_max_pending" counterpart above. + */ + u32 rx_pending; + u32 rx_mini_pending; + u32 rx_jumbo_pending; + u32 tx_pending; +}; +#endif /* ETHTOOL_GRINGPARAM */ + +#ifndef ETHTOOL_SRINGPARAM +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ +#endif +#ifndef ETHTOOL_GPAUSEPARAM +#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ +/* for configuring link flow control parameters */ +#define ethtool_pauseparam _kc_ethtool_pauseparam +struct _kc_ethtool_pauseparam { + u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ + + /* If the link is being auto-negotiated (via ethtool_cmd.autoneg + * being true) the user may set 'autoneg' here non-zero to have the + * pause parameters be auto-negotiated too. In such a case, the + * {rx,tx}_pause values below determine what capabilities are + * advertised. + * + * If 'autoneg' is zero or the link is not being auto-negotiated, + * then {rx,tx}_pause force the driver to use/not-use pause + * flow control. + */ + u32 autoneg; + u32 rx_pause; + u32 tx_pause; +}; +#endif /* ETHTOOL_GPAUSEPARAM */ + +#ifndef ETHTOOL_SPAUSEPARAM +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ +#endif +#ifndef ETHTOOL_GRXCSUM +#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_SRXCSUM +#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GTXCSUM +#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STXCSUM +#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GSG +#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable + * (ethtool_value) */ +#endif +#ifndef ETHTOOL_SSG +#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable + * (ethtool_value). */ +#endif +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ +#endif +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ +#endif +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ +#endif +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ +#endif +#ifndef ETHTOOL_GTSO +#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STSO +#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#endif + +#ifndef ETHTOOL_BUSINFO_LEN +#define ETHTOOL_BUSINFO_LEN 32 +#endif + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif +#ifndef AX_RELEASE_VERSION +#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif + +#ifndef AX_RELEASE_CODE +#define AX_RELEASE_CODE 0 +#endif + +#if (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,0)) +#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,0) +#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,1)) +#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,1) +#elif (AX_RELEASE_CODE && AX_RELEASE_CODE == AX_RELEASE_VERSION(3,2)) +#define RHEL_RELEASE_CODE RHEL_RELEASE_VERSION(5,3) +#endif + +#ifndef RHEL_RELEASE_CODE +/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */ +#define RHEL_RELEASE_CODE 0 +#endif + +/* SuSE version macro is the same as Linux kernel version */ +#ifndef SLE_VERSION +#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) +#endif +#ifdef CONFIG_SUSE_KERNEL +#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) +/* SLES11 GA is 2.6.27 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,0,0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,61)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0))) +/* SLES11 SP3 is at least 3.0.61+ based */ +#define SLE_VERSION_CODE SLE_VERSION(11,3,0) +#elif ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,12,28) ) +/* SLES12 is at least 3.12.28+ based */ +#define SLE_VERSION_CODE SLE_VERSION(12,0,0) +#endif /* LINUX_VERSION_CODE == KERNEL_VERSION(x,y,z) */ +#endif /* CONFIG_SUSE_KERNEL */ +#ifndef SLE_VERSION_CODE +#define SLE_VERSION_CODE 0 +#endif /* SLE_VERSION_CODE */ + +/* Ubuntu release and kernel codes must be specified from Makefile */ +#ifndef UBUNTU_RELEASE_VERSION +#define UBUNTU_RELEASE_VERSION(a,b) (((a) * 100) + (b)) +#endif +#ifndef UBUNTU_KERNEL_VERSION +#define UBUNTU_KERNEL_VERSION(a,b,c,abi,upload) (((a) << 40) + ((b) << 32) + ((c) << 24) + ((abi) << 8) + (upload)) +#endif +#ifndef UBUNTU_RELEASE_CODE +#define UBUNTU_RELEASE_CODE 0 +#endif +#ifndef UBUNTU_KERNEL_CODE +#define UBUNTU_KERNEL_CODE 0 +#endif + +#ifdef __KLOCWORK__ +#ifdef ARRAY_SIZE +#undef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif +#endif /* __KLOCWORK__ */ + +/*****************************************************************************/ +/* 2.4.3 => 2.4.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +#ifndef pci_set_dma_mask +#define pci_set_dma_mask _kc_pci_set_dma_mask +extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); +#endif + +#ifndef pci_request_regions +#define pci_request_regions _kc_pci_request_regions +extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name); +#endif + +#ifndef pci_release_regions +#define pci_release_regions _kc_pci_release_regions +extern void _kc_pci_release_regions(struct pci_dev *pdev); +#endif + +/**************************************/ +/* NETWORK DRIVER API */ + +#ifndef alloc_etherdev +#define alloc_etherdev _kc_alloc_etherdev +extern struct net_device * _kc_alloc_etherdev(int sizeof_priv); +#endif + +#ifndef is_valid_ether_addr +#define is_valid_ether_addr _kc_is_valid_ether_addr +extern int _kc_is_valid_ether_addr(u8 *addr); +#endif + +/**************************************/ +/* MISCELLANEOUS */ + +#ifndef INIT_TQUEUE +#define INIT_TQUEUE(_tq, _routine, _data) \ + do { \ + INIT_LIST_HEAD(&(_tq)->list); \ + (_tq)->sync = 0; \ + (_tq)->routine = _routine; \ + (_tq)->data = _data; \ + } while (0) +#endif + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) ) +/* Generic MII registers. */ +#define MII_BMCR 0x00 /* Basic mode control register */ +#define MII_BMSR 0x01 /* Basic mode status register */ +#define MII_PHYSID1 0x02 /* PHYS ID 1 */ +#define MII_PHYSID2 0x03 /* PHYS ID 2 */ +#define MII_ADVERTISE 0x04 /* Advertisement control reg */ +#define MII_LPA 0x05 /* Link partner ability reg */ +#define MII_EXPANSION 0x06 /* Expansion register */ +/* Basic mode control register. */ +#define BMCR_FULLDPLX 0x0100 /* Full duplex */ +#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ +/* Basic mode status register. */ +#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ +#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ +#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ +#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ +#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ +#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ +/* Advertisement control register. */ +#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ +#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ + ADVERTISE_100HALF | ADVERTISE_100FULL) +/* Expansion register for auto-negotiation. */ +#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ +#endif + +/*****************************************************************************/ +/* 2.4.6 => 2.4.3 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +#ifndef pci_set_power_state +#define pci_set_power_state _kc_pci_set_power_state +extern int _kc_pci_set_power_state(struct pci_dev *dev, int state); +#endif + +#ifndef pci_enable_wake +#define pci_enable_wake _kc_pci_enable_wake +extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable); +#endif + +#ifndef pci_disable_device +#define pci_disable_device _kc_pci_disable_device +extern void _kc_pci_disable_device(struct pci_dev *pdev); +#endif + +/* PCI PM entry point syntax changed, so don't support suspend/resume */ +#undef CONFIG_PM + +#endif /* 2.4.6 => 2.4.3 */ + +#ifndef HAVE_PCI_SET_MWI +#define pci_set_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ + PCI_COMMAND_INVALIDATE); +#define pci_clear_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ + ~PCI_COMMAND_INVALIDATE); +#endif + +/*****************************************************************************/ +/* 2.4.10 => 2.4.9 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) ) + +/**************************************/ +/* MODULE API */ + +#ifndef MODULE_LICENSE + #define MODULE_LICENSE(X) +#endif + +/**************************************/ +/* OTHER */ + +#undef min +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#undef max +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#define min_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x < _y ? _x : _y; }) + +#define max_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x > _y ? _x : _y; }) + +#ifndef list_for_each_safe +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) +#endif + +#ifndef ____cacheline_aligned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_aligned_in_smp ____cacheline_aligned +#else +#define ____cacheline_aligned_in_smp +#endif /* CONFIG_SMP */ +#endif + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...); +#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args) +extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args) +#else /* 2.4.8 => 2.4.9 */ +extern int snprintf(char * buf, size_t size, const char *fmt, ...); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#endif +#endif /* 2.4.10 -> 2.4.6 */ + + +/*****************************************************************************/ +/* 2.4.12 => 2.4.10 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) ) +#ifndef HAVE_NETIF_MSG +#define HAVE_NETIF_MSG 1 +enum { + NETIF_MSG_DRV = 0x0001, + NETIF_MSG_PROBE = 0x0002, + NETIF_MSG_LINK = 0x0004, + NETIF_MSG_TIMER = 0x0008, + NETIF_MSG_IFDOWN = 0x0010, + NETIF_MSG_IFUP = 0x0020, + NETIF_MSG_RX_ERR = 0x0040, + NETIF_MSG_TX_ERR = 0x0080, + NETIF_MSG_TX_QUEUED = 0x0100, + NETIF_MSG_INTR = 0x0200, + NETIF_MSG_TX_DONE = 0x0400, + NETIF_MSG_RX_STATUS = 0x0800, + NETIF_MSG_PKTDATA = 0x1000, + NETIF_MSG_HW = 0x2000, + NETIF_MSG_WOL = 0x4000, +}; + +#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) +#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) +#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) +#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) +#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) +#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) +#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) +#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) +#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) +#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) +#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) +#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) +#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) +#endif /* !HAVE_NETIF_MSG */ +#endif /* 2.4.12 => 2.4.10 */ + +/*****************************************************************************/ +/* 2.4.13 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#ifndef virt_to_page + #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT)) +#endif + +#ifndef pci_map_page +#define pci_map_page _kc_pci_map_page +extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction); +#endif + +#ifndef pci_unmap_page +#define pci_unmap_page _kc_pci_unmap_page +extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction); +#endif + +/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */ + +#undef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0xffffffff +#undef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffff + +/**************************************/ +/* OTHER */ + +#ifndef cpu_relax +#define cpu_relax() rep_nop() +#endif + +struct vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_vlan_proto; + unsigned short h_vlan_TCI; + unsigned short h_vlan_encapsulated_proto; +}; +#endif /* 2.4.13 => 2.4.12 */ + +/*****************************************************************************/ +/* 2.4.17 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) ) + +#ifndef __devexit_p + #define __devexit_p(x) &(x) +#endif + +#else + /* For Kernel 3.8 these are not defined - so undefine all */ + #undef __devexit_p + #undef __devexit + #undef __devinit + #undef __devinitdata + #define __devexit_p(x) &(x) + #define __devexit + #define __devinit + #define __devinitdata + +#endif /* 2.4.17 => 2.4.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ) +#define NETIF_MSG_HW 0x2000 +#define NETIF_MSG_WOL 0x4000 + +#ifndef netif_msg_hw +#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) +#endif +#ifndef netif_msg_wol +#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) +#endif +#endif /* 2.4.18 */ + +/*****************************************************************************/ + +/*****************************************************************************/ +/* 2.4.20 => 2.4.19 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) ) + +/* we won't support NAPI on less than 2.4.20 */ +#ifdef NAPI +#undef NAPI +#endif + +#endif /* 2.4.20 => 2.4.19 */ + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#define pci_name(x) ((x)->slot_name) + +#ifndef SUPPORTED_10000baseT_Full +#define SUPPORTED_10000baseT_Full (1 << 12) +#endif +#ifndef ADVERTISED_10000baseT_Full +#define ADVERTISED_10000baseT_Full (1 << 12) +#endif +#endif + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#ifndef IGB_NO_LRO +#define IGB_NO_LRO +#endif +#endif + +/*****************************************************************************/ +/*****************************************************************************/ +/* 2.4.23 => 2.4.22 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) ) +/*****************************************************************************/ +#ifdef NAPI +#ifndef netif_poll_disable +#define netif_poll_disable(x) _kc_netif_poll_disable(x) +static inline void _kc_netif_poll_disable(struct net_device *netdev) +{ + while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) { + /* No hurry */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } +} +#endif +#ifndef netif_poll_enable +#define netif_poll_enable(x) _kc_netif_poll_enable(x) +static inline void _kc_netif_poll_enable(struct net_device *netdev) +{ + clear_bit(__LINK_STATE_RX_SCHED, &netdev->state); +} +#endif +#endif /* NAPI */ +#ifndef netif_tx_disable +#define netif_tx_disable(x) _kc_netif_tx_disable(x) +static inline void _kc_netif_tx_disable(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + netif_stop_queue(dev); + spin_unlock_bh(&dev->xmit_lock); +} +#endif +#else /* 2.4.23 => 2.4.22 */ +#define HAVE_SCTP +#endif /* 2.4.23 => 2.4.22 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) ) +#define ETHTOOL_OPS_COMPAT +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) +#define __user +#endif /* < 2.4.27 */ + +/*****************************************************************************/ +/* 2.5.71 => 2.4.x */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) ) +#define sk_protocol protocol +#define pci_get_device pci_find_device +#endif /* 2.5.70 => 2.4.x */ + +/*****************************************************************************/ +/* < 2.4.27 or 2.6.0 <= 2.6.5 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) ) + +#ifndef netif_msg_init +#define netif_msg_init _kc_netif_msg_init +static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits) +{ + /* use default */ + if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) + return default_msg_enable_bits; + if (debug_value == 0) /* no output */ + return 0; + /* set low N bits */ + return (1 << debug_value) -1; +} +#endif + +#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */ +/*****************************************************************************/ +#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \ + (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \ + ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ))) +#define netdev_priv(x) x->priv +#endif + +/*****************************************************************************/ +/* <= 2.5.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) ) +#include +#undef pci_register_driver +#define pci_register_driver pci_module_init + +/* + * Most of the dma compat code is copied/modifed from the 2.4.37 + * /include/linux/libata-compat.h header file + */ +/* These definitions mirror those in pci.h, so they can be used + * interchangeably with their PCI_ counterparts */ +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +struct device { + struct pci_dev pdev; +}; + +static inline struct pci_dev *to_pci_dev (struct device *dev) +{ + return (struct pci_dev *) dev; +} +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return (struct device *) pdev; +} + +#define pdev_printk(lvl, pdev, fmt, args...) \ + printk("%s %s: " fmt, lvl, pci_name(pdev), ## args) +#define dev_err(dev, fmt, args...) \ + pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args) +#define dev_info(dev, fmt, args...) \ + pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args) +#define dev_warn(dev, fmt, args...) \ + pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args) +#define dev_notice(dev, fmt, args...) \ + pdev_printk(KERN_NOTICE, to_pci_dev(dev), fmt, ## args) +#define dev_dbg(dev, fmt, args...) \ + pdev_printk(KERN_DEBUG, to_pci_dev(dev), fmt, ## args) + +/* NOTE: dangerous! we ignore the 'gfp' argument */ +#define dma_alloc_coherent(dev,sz,dma,gfp) \ + pci_alloc_consistent(to_pci_dev(dev),(sz),(dma)) +#define dma_free_coherent(dev,sz,addr,dma_addr) \ + pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr)) + +#define dma_map_page(dev,a,b,c,d) \ + pci_map_page(to_pci_dev(dev),(a),(b),(c),(d)) +#define dma_unmap_page(dev,a,b,c) \ + pci_unmap_page(to_pci_dev(dev),(a),(b),(c)) + +#define dma_map_single(dev,a,b,c) \ + pci_map_single(to_pci_dev(dev),(a),(b),(c)) +#define dma_unmap_single(dev,a,b,c) \ + pci_unmap_single(to_pci_dev(dev),(a),(b),(c)) + +#define dma_map_sg(dev, sg, nents, dir) \ + pci_map_sg(to_pci_dev(dev), (sg), (nents), (dir) +#define dma_unmap_sg(dev, sg, nents, dir) \ + pci_unmap_sg(to_pci_dev(dev), (sg), (nents), (dir) + +#define dma_sync_single(dev,a,b,c) \ + pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c)) + +/* for range just sync everything, that's all the pci API can do */ +#define dma_sync_single_range(dev,addr,off,sz,dir) \ + pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir)) + +#define dma_set_mask(dev,mask) \ + pci_set_dma_mask(to_pci_dev(dev),(mask)) + +/* hlist_* code - double linked lists */ +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = NULL; + n->pprev = NULL; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} + +#ifndef might_sleep +#define might_sleep() +#endif +#else +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return &pdev->dev; +} +#endif /* <= 2.5.0 */ + +/*****************************************************************************/ +/* 2.5.28 => 2.4.23 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) + +#include +#define work_struct tq_struct +#undef INIT_WORK +#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a) +#undef container_of +#define container_of list_entry +#define schedule_work schedule_task +#define flush_scheduled_work flush_scheduled_tasks +#define cancel_work_sync(x) flush_scheduled_work() + +#endif /* 2.5.28 => 2.4.17 */ + +/*****************************************************************************/ +/* 2.6.0 => 2.5.28 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#ifndef read_barrier_depends +#define read_barrier_depends() rmb() +#endif + +#undef get_cpu +#define get_cpu() smp_processor_id() +#undef put_cpu +#define put_cpu() do { } while(0) +#define MODULE_INFO(version, _version) +#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT +#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 +#endif +#ifndef CONFIG_IGB_DISABLE_PACKET_SPLIT +#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1 +#endif + +#define dma_set_coherent_mask(dev,mask) 1 + +#undef dev_put +#define dev_put(dev) __dev_put(dev) + +#ifndef skb_fill_page_desc +#define skb_fill_page_desc _kc_skb_fill_page_desc +extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size); +#endif + +#undef ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +#ifndef page_count +#define page_count(p) atomic_read(&(p)->count) +#endif + +#ifdef MAX_NUMNODES +#undef MAX_NUMNODES +#endif +#define MAX_NUMNODES 1 + +/* find_first_bit and find_next bit are not defined for most + * 2.4 kernels (except for the redhat 2.4.21 kernels + */ +#include +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#undef find_next_bit +#define find_next_bit _kc_find_next_bit +extern unsigned long _kc_find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) + + +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (strchr(dev->name, '%')) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#ifndef strlcpy +#define strlcpy _kc_strlcpy +extern size_t _kc_strlcpy(char *dest, const char *src, size_t size); +#endif /* strlcpy */ + +#ifndef do_div +#if BITS_PER_LONG == 64 +# define do_div(n,base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + __rem = ((uint64_t)(n)) % __base; \ + (n) = ((uint64_t)(n)) / __base; \ + __rem; \ + }) +#elif BITS_PER_LONG == 32 +extern uint32_t _kc__div64_32(uint64_t *dividend, uint32_t divisor); +# define do_div(n,base) ({ \ + uint32_t __base = (base); \ + uint32_t __rem; \ + if (likely(((n) >> 32) == 0)) { \ + __rem = (uint32_t)(n) % __base; \ + (n) = (uint32_t)(n) / __base; \ + } else \ + __rem = _kc__div64_32(&(n), __base); \ + __rem; \ + }) +#else /* BITS_PER_LONG == ?? */ +# error do_div() does not yet support the C64 +#endif /* BITS_PER_LONG */ +#endif /* do_div */ + +#ifndef NSEC_PER_SEC +#define NSEC_PER_SEC 1000000000L +#endif + +#undef HAVE_I2C_SUPPORT +#else /* 2.6.0 */ +#if IS_ENABLED(CONFIG_I2C_ALGOBIT) && \ + (RHEL_RELEASE_CODE && (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,9))) +#define HAVE_I2C_SUPPORT +#endif /* IS_ENABLED(CONFIG_I2C_ALGOBIT) */ + +#endif /* 2.6.0 => 2.5.28 */ +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ) +#define dma_pool pci_pool +#define dma_pool_destroy pci_pool_destroy +#define dma_pool_alloc pci_pool_alloc +#define dma_pool_free pci_pool_free + +#define dma_pool_create(name,dev,size,align,allocation) \ + pci_pool_create((name),to_pci_dev(dev),(size),(align),(allocation)) +#endif /* < 2.6.3 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +#define MODULE_VERSION(_version) MODULE_INFO(version, _version) +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +/* 2.6.5 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) +#define dma_sync_single_for_cpu dma_sync_single +#define dma_sync_single_for_device dma_sync_single +#define dma_sync_single_range_for_cpu dma_sync_single_range +#define dma_sync_single_range_for_device dma_sync_single_range +#ifndef pci_dma_mapping_error +#define pci_dma_mapping_error _kc_pci_dma_mapping_error +static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr) +{ + return dma_addr == 0; +} +#endif +#endif /* 2.6.5 => 2.6.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...); +#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args) +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) ) +/* taken from 2.6 include/linux/bitmap.h */ +#undef bitmap_zero +#define bitmap_zero _kc_bitmap_zero +static inline void _kc_bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} +#define random_ether_addr _kc_random_ether_addr +static inline void _kc_random_ether_addr(u8 *addr) +{ + get_random_bytes(addr, ETH_ALEN); + addr[0] &= 0xfe; /* clear multicast */ + addr[0] |= 0x02; /* set local assignment */ +} +#define page_to_nid(x) 0 + +#endif /* < 2.6.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) ) +#undef if_mii +#define if_mii _kc_if_mii +static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq) +{ + return (struct mii_ioctl_data *) &rq->ifr_ifru; +} + +#ifndef __force +#define __force +#endif +#endif /* < 2.6.7 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +#ifndef PCI_EXP_DEVCTL +#define PCI_EXP_DEVCTL 8 +#endif +#ifndef PCI_EXP_DEVCTL_CERE +#define PCI_EXP_DEVCTL_CERE 0x0001 +#endif +#define PCI_EXP_FLAGS 2 /* Capabilities register */ +#define PCI_EXP_FLAGS_VERS 0x000f /* Capability version */ +#define PCI_EXP_FLAGS_TYPE 0x00f0 /* Device/Port type */ +#define PCI_EXP_TYPE_ENDPOINT 0x0 /* Express Endpoint */ +#define PCI_EXP_TYPE_LEG_END 0x1 /* Legacy Endpoint */ +#define PCI_EXP_TYPE_ROOT_PORT 0x4 /* Root Port */ +#define PCI_EXP_TYPE_DOWNSTREAM 0x6 /* Downstream Port */ +#define PCI_EXP_FLAGS_SLOT 0x0100 /* Slot implemented */ +#define PCI_EXP_DEVCAP 4 /* Device capabilities */ +#define PCI_EXP_DEVSTA 10 /* Device Status */ +#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \ + schedule_timeout((x * HZ)/1000 + 2); \ + } while (0) + +#endif /* < 2.6.8 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) +#include +#define __iomem + +#ifndef kcalloc +#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags) +extern void *_kc_kzalloc(size_t size, int flags); +#endif +#define MSEC_PER_SEC 1000L +static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); +#else + return (j * MSEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return m * (HZ / MSEC_PER_SEC); +#else + return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; +#endif +} + +#define msleep_interruptible _kc_msleep_interruptible +static inline unsigned long _kc_msleep_interruptible(unsigned int msecs) +{ + unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1; + + while (timeout && !signal_pending(current)) { + __set_current_state(TASK_INTERRUPTIBLE); + timeout = schedule_timeout(timeout); + } + return _kc_jiffies_to_msecs(timeout); +} + +/* Basic mode control register. */ +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ + +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 +#endif +#ifndef __be16 +#define __be16 u16 +#endif +#ifndef __be32 +#define __be32 u32 +#endif +#ifndef __be64 +#define __be64 u64 +#endif + +static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb->mac.raw; +} + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ + +#define skb_header_pointer _kc_skb_header_pointer +static inline void *_kc_skb_header_pointer(const struct sk_buff *skb, + int offset, int len, void *buffer) +{ + int hlen = skb_headlen(skb); + + if (hlen - offset >= len) + return skb->data + offset; + +#ifdef MAX_SKB_FRAGS + if (skb_copy_bits(skb, offset, buffer, len) < 0) + return NULL; + + return buffer; +#else + return NULL; +#endif + +#ifndef NETDEV_TX_OK +#define NETDEV_TX_OK 0 +#endif +#ifndef NETDEV_TX_BUSY +#define NETDEV_TX_BUSY 1 +#endif +#ifndef NETDEV_TX_LOCKED +#define NETDEV_TX_LOCKED -1 +#endif +} + +#ifndef __bitwise +#define __bitwise +#endif +#endif /* < 2.6.9 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +#ifdef module_param_array_named +#undef module_param_array_named +#define module_param_array_named(name, array, type, nump, perm) \ + static struct kparam_array __param_arr_##name \ + = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \ + sizeof(array[0]), array }; \ + module_param_call(name, param_array_set, param_array_get, \ + &__param_arr_##name, perm) +#endif /* module_param_array_named */ +/* + * num_online is broken for all < 2.6.10 kernels. This is needed to support + * Node module parameter of ixgbe. + */ +#undef num_online_nodes +#define num_online_nodes(n) 1 +extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES); +#undef node_online_map +#define node_online_map _kcompat_node_online_map +#define pci_get_class pci_find_class +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) ) +#define PCI_D0 0 +#define PCI_D1 1 +#define PCI_D2 2 +#define PCI_D3hot 3 +#define PCI_D3cold 4 +typedef int pci_power_t; +#define pci_choose_state(pdev,state) state +#define PMSG_SUSPEND 3 +#define PCI_EXP_LNKCTL 16 + +#undef NETIF_F_LLTX + +#ifndef ARCH_HAS_PREFETCH +#define prefetch(X) +#endif + +#ifndef NET_IP_ALIGN +#define NET_IP_ALIGN 2 +#endif + +#define KC_USEC_PER_SEC 1000000L +#define usecs_to_jiffies _kc_usecs_to_jiffies +static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j) +{ +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (KC_USEC_PER_SEC / HZ) * j; +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC); +#else + return (j * KC_USEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ); +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return m * (HZ / KC_USEC_PER_SEC); +#else + return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC; +#endif +} + +#define PCI_EXP_LNKCAP 12 /* Link Capabilities */ +#define PCI_EXP_LNKSTA 18 /* Link Status */ +#define PCI_EXP_SLTCAP 20 /* Slot Capabilities */ +#define PCI_EXP_SLTCTL 24 /* Slot Control */ +#define PCI_EXP_SLTSTA 26 /* Slot Status */ +#define PCI_EXP_RTCTL 28 /* Root Control */ +#define PCI_EXP_RTCAP 30 /* Root Capabilities */ +#define PCI_EXP_RTSTA 32 /* Root Status */ +#endif /* < 2.6.11 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) ) +#include +#define USE_REBOOT_NOTIFIER + +/* Generic MII registers. */ +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ +#define MII_STAT1000 0x0a /* 1000BASE-T status */ +/* Advertisement control register. */ +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */ +/* Link partner ability register. */ +#define LPA_PAUSE_CAP 0x0400 /* Can pause */ +#define LPA_PAUSE_ASYM 0x0800 /* Can pause asymetrically */ +/* 1000BASE-T Control register */ +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ +#define ADVERTISE_1000HALF 0x0100 /* Advertise 1000BASE-T half duplex */ +/* 1000BASE-T Status register */ +#define LPA_1000LOCALRXOK 0x2000 /* Link partner local receiver status */ +#define LPA_1000REMRXOK 0x1000 /* Link partner remote receiver status */ + +#ifndef is_zero_ether_addr +#define is_zero_ether_addr _kc_is_zero_ether_addr +static inline int _kc_is_zero_ether_addr(const u8 *addr) +{ + return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +} +#endif /* is_zero_ether_addr */ +#ifndef is_multicast_ether_addr +#define is_multicast_ether_addr _kc_is_multicast_ether_addr +static inline int _kc_is_multicast_ether_addr(const u8 *addr) +{ + return addr[0] & 0x01; +} +#endif /* is_multicast_ether_addr */ +#endif /* < 2.6.12 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +#ifndef kstrdup +#define kstrdup _kc_kstrdup +extern char *_kc_kstrdup(const char *s, unsigned int gfp); +#endif +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +#define pm_message_t u32 +#ifndef kzalloc +#define kzalloc _kc_kzalloc +extern void *_kc_kzalloc(size_t size, int flags); +#endif + +/* Generic MII registers. */ +#define MII_ESTATUS 0x0f /* Extended Status */ +/* Basic mode status register. */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +/* Extended status register. */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + +#define SUPPORTED_Pause (1 << 13) +#define SUPPORTED_Asym_Pause (1 << 14) +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#if (!(RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)))) +#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t)) +#define gfp_t unsigned +#else +typedef unsigned gfp_t; +#endif +#endif /* !RHEL4.3->RHEL5.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) ) +#ifdef CONFIG_X86_64 +#define dma_sync_single_range_for_cpu(dev, addr, off, sz, dir) \ + dma_sync_single_for_cpu((dev), (addr), (off) + (sz), (dir)) +#define dma_sync_single_range_for_device(dev, addr, off, sz, dir) \ + dma_sync_single_for_device((dev), (addr), (off) + (sz), (dir)) +#endif +#endif +#endif /* < 2.6.14 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) ) +#ifndef vmalloc_node +#define vmalloc_node(a,b) vmalloc(a) +#endif /* vmalloc_node*/ + +#define setup_timer(_timer, _function, _data) \ +do { \ + (_timer)->function = _function; \ + (_timer)->data = _data; \ + init_timer(_timer); \ +} while (0) +#ifndef device_can_wakeup +#define device_can_wakeup(dev) (1) +#endif +#ifndef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) do{}while(0) +#endif +#ifndef device_init_wakeup +#define device_init_wakeup(dev,val) do {} while (0) +#endif +static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} +#undef compare_ether_addr +#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2) +#endif /* < 2.6.15 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ) +#undef DEFINE_MUTEX +#define DEFINE_MUTEX(x) DECLARE_MUTEX(x) +#define mutex_lock(x) down_interruptible(x) +#define mutex_unlock(x) up(x) + +#ifndef ____cacheline_internodealigned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp +#else +#define ____cacheline_internodealigned_in_smp +#endif /* CONFIG_SMP */ +#endif /* ____cacheline_internodealigned_in_smp */ +#undef HAVE_PCI_ERS +#else /* 2.6.16 and above */ +#undef HAVE_PCI_ERS +#define HAVE_PCI_ERS +#if ( SLE_VERSION_CODE && SLE_VERSION_CODE == SLE_VERSION(10,4,0) ) +#ifdef device_can_wakeup +#undef device_can_wakeup +#endif /* device_can_wakeup */ +#define device_can_wakeup(dev) 1 +#endif /* SLE_VERSION(10,4,0) */ +#endif /* < 2.6.16 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) ) +#ifndef dev_notice +#define dev_notice(dev, fmt, args...) \ + dev_printk(KERN_NOTICE, dev, fmt, ## args) +#endif + +#ifndef first_online_node +#define first_online_node 0 +#endif +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif +#endif /* < 2.6.17 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ) + +#ifndef IRQ_HANDLED +#define irqreturn_t void +#define IRQ_HANDLED +#define IRQ_NONE +#endif + +#ifndef IRQF_PROBE_SHARED +#ifdef SA_PROBEIRQ +#define IRQF_PROBE_SHARED SA_PROBEIRQ +#else +#define IRQF_PROBE_SHARED 0 +#endif +#endif + +#ifndef IRQF_SHARED +#define IRQF_SHARED SA_SHIRQ +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifndef FIELD_SIZEOF +#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#endif + +#ifndef skb_is_gso +#ifdef NETIF_F_TSO +#define skb_is_gso _kc_skb_is_gso +static inline int _kc_skb_is_gso(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_size; +} +#else +#define skb_is_gso(a) 0 +#endif +#endif + +#ifndef resource_size_t +#define resource_size_t unsigned long +#endif + +#ifdef skb_pad +#undef skb_pad +#endif +#define skb_pad(x,y) _kc_skb_pad(x, y) +int _kc_skb_pad(struct sk_buff *skb, int pad); +#ifdef skb_padto +#undef skb_padto +#endif +#define skb_padto(x,y) _kc_skb_padto(x, y) +static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + if(likely(size >= len)) + return 0; + return _kc_skb_pad(skb, len - size); +} + +#ifndef DECLARE_PCI_UNMAP_ADDR +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + u32 LEN_NAME +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) +#endif /* DECLARE_PCI_UNMAP_ADDR */ +#endif /* < 2.6.18 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,0))) +#define i_private u.generic_ip +#endif /* >= RHEL 5.0 */ + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#endif +#ifndef __ALIGN_MASK +#define __ALIGN_MASK(x, mask) (((x) + (mask)) & ~(mask)) +#endif +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) ) +#if (!((RHEL_RELEASE_CODE && \ + ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \ + RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))))) +typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *); +#endif +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +#undef CONFIG_INET_LRO +#undef CONFIG_INET_LRO_MODULE +#ifdef IXGBE_FCOE +#undef CONFIG_FCOE +#undef CONFIG_FCOE_MODULE +#endif /* IXGBE_FCOE */ +#endif +typedef irqreturn_t (*new_handler_t)(int, void*); +static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#else /* 2.4.x */ +typedef void (*irq_handler_t)(int, void*, struct pt_regs *); +typedef void (*new_handler_t)(int, void*); +static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#endif /* >= 2.5.x */ +{ + irq_handler_t new_handler = (irq_handler_t) handler; + return request_irq(irq, new_handler, flags, devname, dev_id); +} + +#undef request_irq +#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id)) + +#define irq_handler_t new_handler_t +/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */ +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +#define PCIE_CONFIG_SPACE_LEN 256 +#define PCI_CONFIG_SPACE_LEN 64 +#define PCIE_LINK_STATUS 0x12 +#define pci_config_space_ich8lan() do {} while(0) +#undef pci_save_state +extern int _kc_pci_save_state(struct pci_dev *); +#define pci_save_state(pdev) _kc_pci_save_state(pdev) +#undef pci_restore_state +extern void _kc_pci_restore_state(struct pci_dev *); +#define pci_restore_state(pdev) _kc_pci_restore_state(pdev) +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +#undef free_netdev +extern void _kc_free_netdev(struct net_device *); +#define free_netdev(netdev) _kc_free_netdev(netdev) +#endif +static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + return 0; +} +#define pci_disable_pcie_error_reporting(dev) do {} while (0) +#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0) + +extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp); +#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp) +#ifndef bool +#define bool _Bool +#define true 1 +#define false 0 +#endif +#else /* 2.6.19 */ +#include +#include +#endif /* < 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) ) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) ) +#undef INIT_WORK +#define INIT_WORK(_work, _func) \ +do { \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->pending = 0; \ + (_work)->func = (void (*)(void *))_func; \ + (_work)->data = _work; \ + init_timer(&(_work)->timer); \ +} while (0) +#endif + +#ifndef PCI_VDEVICE +#define PCI_VDEVICE(ven, dev) \ + PCI_VENDOR_ID_##ven, (dev), \ + PCI_ANY_ID, PCI_ANY_ID, 0, 0 +#endif + +#ifndef PCI_VENDOR_ID_INTEL +#define PCI_VENDOR_ID_INTEL 0x8086 +#endif + +#ifndef round_jiffies +#define round_jiffies(x) x +#endif + +#define csum_offset csum + +#define HAVE_EARLY_VMALLOC_NODE +#define dev_to_node(dev) -1 +#undef set_dev_node +/* remove compiler warning with b=b, for unused variable */ +#define set_dev_node(a, b) do { (b) = (b); } while(0) + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; +#endif + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} +#endif + +#else /* < 2.6.20 */ +#define HAVE_DEVICE_NUMA_NODE +#endif /* < 2.6.20 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#define NETDEV_CLASS_DEV +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) +#define vlan_group_get_device(vg, id) (vg->vlan_devices[id]) +#define vlan_group_set_device(vg, id, dev) \ + do { \ + if (vg) vg->vlan_devices[id] = dev; \ + } while (0) +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ +#define pci_channel_offline(pdev) (pdev->error_state && \ + pdev->error_state != pci_channel_io_normal) +#define pci_request_selected_regions(pdev, bars, name) \ + pci_request_regions(pdev, name) +#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev); + +#ifndef __aligned +#define __aligned(x) __attribute__((aligned(x))) +#endif + +extern struct pci_dev *_kc_netdev_to_pdev(struct net_device *netdev); +#define netdev_to_dev(netdev) \ + pci_dev_to_dev(_kc_netdev_to_pdev(netdev)) +#else +static inline struct device *netdev_to_dev(struct net_device *netdev) +{ + return &netdev->dev; +} + +#endif /* < 2.6.21 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define tcp_hdr(skb) (skb->h.th) +#define tcp_hdrlen(skb) (skb->h.th->doff << 2) +#define skb_transport_offset(skb) (skb->h.raw - skb->data) +#define skb_transport_header(skb) (skb->h.raw) +#define ipv6_hdr(skb) (skb->nh.ipv6h) +#define ip_hdr(skb) (skb->nh.iph) +#define skb_network_offset(skb) (skb->nh.raw - skb->data) +#define skb_network_header(skb) (skb->nh.raw) +#define skb_tail_pointer(skb) skb->tail +#define skb_reset_tail_pointer(skb) \ + do { \ + skb->tail = skb->data; \ + } while (0) +#define skb_set_tail_pointer(skb, offset) \ + do { \ + skb->tail = skb->data + offset; \ + } while (0) +#define skb_copy_to_linear_data(skb, from, len) \ + memcpy(skb->data, from, len) +#define skb_copy_to_linear_data_offset(skb, offset, from, len) \ + memcpy(skb->data + offset, from, len) +#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw) +#define pci_register_driver pci_module_init +#define skb_mac_header(skb) skb->mac.raw + +#ifdef NETIF_F_MULTI_QUEUE +#ifndef alloc_etherdev_mq +#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a) +#endif +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef ETH_FCS_LEN +#define ETH_FCS_LEN 4 +#endif +#define cancel_work_sync(x) flush_scheduled_work() +#ifndef udp_hdr +#define udp_hdr _udp_hdr +static inline struct udphdr *_udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif + +#ifdef cpu_to_be16 +#undef cpu_to_be16 +#endif +#define cpu_to_be16(x) __constant_htons(x) + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1))) +enum { + DUMP_PREFIX_NONE, + DUMP_PREFIX_ADDRESS, + DUMP_PREFIX_OFFSET +}; +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */ +#ifndef hex_asc +#define hex_asc(x) "0123456789abcdef"[x] +#endif +#include +extern void _kc_print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); +#define print_hex_dump(lvl, s, t, r, g, b, l, a) \ + _kc_print_hex_dump(lvl, s, t, r, g, b, l, a) +#ifndef ADVERTISED_2500baseX_Full +#define ADVERTISED_2500baseX_Full (1 << 15) +#endif +#ifndef SUPPORTED_2500baseX_Full +#define SUPPORTED_2500baseX_Full (1 << 15) +#endif + +#ifdef HAVE_I2C_SUPPORT +#include +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) +struct i2c_board_info { + char driver_name[KOBJ_NAME_LEN]; + char type[I2C_NAME_SIZE]; + unsigned short flags; + unsigned short addr; + void *platform_data; +}; +#define I2C_BOARD_INFO(driver, dev_addr) .driver_name = (driver),\ + .addr = (dev_addr) +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ +#define i2c_new_device(adap, info) _kc_i2c_new_device(adap, info) +extern struct i2c_client * +_kc_i2c_new_device(struct i2c_adapter *adap, struct i2c_board_info const *info); +#endif /* HAVE_I2C_SUPPORT */ + +#else /* 2.6.22 */ +#define ETH_TYPE_TRANS_SETS_DEV +#define HAVE_NETDEV_STATS_IN_NETDEV +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) ) +#undef SET_MODULE_OWNER +#define SET_MODULE_OWNER(dev) do { } while (0) +#endif /* > 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) +#define netif_subqueue_stopped(_a, _b) 0 +#ifndef PTR_ALIGN +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#endif + +#ifndef CONFIG_PM_SLEEP +#define CONFIG_PM_SLEEP CONFIG_PM +#endif + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) ) +#define HAVE_ETHTOOL_GET_PERM_ADDR +#endif /* 2.6.14 through 2.6.22 */ +#endif /* < 2.6.23 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifndef ETH_FLAG_LRO +#define ETH_FLAG_LRO NETIF_F_LRO +#endif + +/* if GRO is supported then the napi struct must already exist */ +#ifndef NETIF_F_GRO +/* NAPI API changes in 2.6.24 break everything */ +struct napi_struct { + /* used to look up the real NAPI polling routine */ + int (*poll)(struct napi_struct *, int); + struct net_device *dev; + int weight; +}; +#endif + +#ifdef NAPI +extern int __kc_adapter_clean(struct net_device *, int *); +extern struct net_device *napi_to_poll_dev(const struct napi_struct *napi); +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = (_napi); \ + struct net_device *poll_dev = napi_to_poll_dev(__napi); \ + poll_dev->poll = &(__kc_adapter_clean); \ + poll_dev->priv = (_napi); \ + poll_dev->weight = (_weight); \ + set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \ + set_bit(__LINK_STATE_START, &poll_dev->state);\ + dev_hold(poll_dev); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_napi) \ + do { \ + struct net_device *poll_dev = napi_to_poll_dev(_napi); \ + WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \ + dev_put(poll_dev); \ + memset(poll_dev, 0, sizeof(struct net_device));\ + } while (0) +#define napi_schedule_prep(_napi) \ + (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi))) +#define napi_schedule(_napi) \ + do { \ + if (napi_schedule_prep(_napi)) \ + __netif_rx_schedule(napi_to_poll_dev(_napi)); \ + } while (0) +#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi)) +#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi)) +#ifdef CONFIG_SMP +static inline void napi_synchronize(const struct napi_struct *n) +{ + struct net_device *dev = napi_to_poll_dev(n); + + while (test_bit(__LINK_STATE_RX_SCHED, &dev->state)) { + /* No hurry. */ + msleep(1); + } +} +#else +#define napi_synchronize(n) barrier() +#endif /* CONFIG_SMP */ +#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi)) +#ifndef NETIF_F_GRO +#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi)) +#else +#define napi_complete(_napi) \ + do { \ + napi_gro_flush(_napi); \ + netif_rx_complete(napi_to_poll_dev(_napi)); \ + } while (0) +#endif /* NETIF_F_GRO */ +#else /* NAPI */ +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = _napi; \ + _netdev->poll = &(_poll); \ + _netdev->weight = (_weight); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_a) do {} while (0) +#endif /* NAPI */ + +#undef dev_get_by_name +#define dev_get_by_name(_a, _b) dev_get_by_name(_b) +#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b) +#ifndef DMA_BIT_MASK +#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1)) +#endif + +#ifdef NETIF_F_TSO6 +#define skb_is_gso_v6 _kc_skb_is_gso_v6 +static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; +} +#endif /* NETIF_F_TSO6 */ + +#ifndef KERN_CONT +#define KERN_CONT "" +#endif +#ifndef pr_err +#define pr_err(fmt, arg...) \ + printk(KERN_ERR fmt, ##arg) +#endif +#else /* < 2.6.24 */ +#define HAVE_ETHTOOL_GET_SSET_COUNT +#define HAVE_NETDEV_NAPI_LIST +#endif /* < 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#include +#else /* >= 3.2.0 */ +#include +#endif /* else >= 3.2.0 */ +#endif /* > 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) ) +#define PM_QOS_CPU_DMA_LATENCY 1 + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) ) +#include +#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY +#define pm_qos_add_requirement(pm_qos_class, name, value) \ + set_acceptable_latency(name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) \ + remove_acceptable_latency(name) +#define pm_qos_update_requirement(pm_qos_class, name, value) \ + modify_acceptable_latency(name, value) +#else +#define PM_QOS_DEFAULT_VALUE -1 +#define pm_qos_add_requirement(pm_qos_class, name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) +#define pm_qos_update_requirement(pm_qos_class, name, value) { \ + if (value != PM_QOS_DEFAULT_VALUE) { \ + printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \ + pci_name(adapter->pdev)); \ + } \ +} + +#endif /* > 2.6.18 */ + +#define pci_enable_device_mem(pdev) pci_enable_device(pdev) + +#ifndef DEFINE_PCI_DEVICE_TABLE +#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[] +#endif /* DEFINE_PCI_DEVICE_TABLE */ + + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) +#ifndef IGB_PROCFS +#define IGB_PROCFS +#endif /* IGB_PROCFS */ +#endif /* >= 2.6.0 */ + +#else /* < 2.6.25 */ + + +#if IS_ENABLED(CONFIG_HWMON) +#ifndef IGB_HWMON +#define IGB_HWMON +#endif /* IGB_HWMON */ +#endif /* CONFIG_HWMON */ + +#endif /* < 2.6.25 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +#ifndef clamp_t +#define clamp_t(type, val, min, max) ({ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min : __val; \ + __val > __max ? __max : __val; }) +#endif /* clamp_t */ +#undef kzalloc_node +#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags) + +extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state); +#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s) +#else /* < 2.6.26 */ +#include +#define HAVE_NETDEV_VLAN_FEATURES +#ifndef PCI_EXP_LNKCAP_ASPMS +#define PCI_EXP_LNKCAP_ASPMS 0x00000c00 /* ASPM Support */ +#endif /* PCI_EXP_LNKCAP_ASPMS */ +#endif /* < 2.6.26 */ +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep, + __u32 speed) +{ + ep->speed = (__u16)speed; + /* ep->speed_hi = (__u16)(speed >> 16); */ +} +#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set + +static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep) +{ + /* no speed_hi before 2.6.27, and probably no need for it yet */ + return (__u32)ep->speed; +} +#define ethtool_cmd_speed _kc_ethtool_cmd_speed + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) ) +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM)) +#define ANCIENT_PM 1 +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \ + defined(CONFIG_PM_SLEEP)) +#define NEWER_PM 1 +#endif +#if defined(ANCIENT_PM) || defined(NEWER_PM) +#undef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) \ + do { \ + u16 pmc = 0; \ + int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \ + if (pm) { \ + pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \ + &pmc); \ + } \ + (dev)->power.can_wakeup = !!(pmc >> 11); \ + (dev)->power.should_wakeup = (val && (pmc >> 11)); \ + } while (0) +#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */ +#endif /* 2.6.15 through 2.6.27 */ +#ifndef netif_napi_del +#define netif_napi_del(_a) do {} while (0) +#ifdef NAPI +#ifdef CONFIG_NETPOLL +#undef netif_napi_del +#define netif_napi_del(_a) list_del(&(_a)->dev_list); +#endif +#endif +#endif /* netif_napi_del */ +#ifdef dma_mapping_error +#undef dma_mapping_error +#endif +#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr) + +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#define HAVE_TX_MQ +#endif + +#ifdef HAVE_TX_MQ +extern void _kc_netif_tx_stop_all_queues(struct net_device *); +extern void _kc_netif_tx_wake_all_queues(struct net_device *); +extern void _kc_netif_tx_start_all_queues(struct net_device *); +#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a) +#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a) +#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a) +#undef netif_stop_subqueue +#define netif_stop_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_stop_subqueue((_ndev), (_qi)); \ + else \ + netif_stop_queue((_ndev)); \ + } while (0) +#undef netif_start_subqueue +#define netif_start_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_start_subqueue((_ndev), (_qi)); \ + else \ + netif_start_queue((_ndev)); \ + } while (0) +#else /* HAVE_TX_MQ */ +#define netif_tx_stop_all_queues(a) netif_stop_queue(a) +#define netif_tx_wake_all_queues(a) netif_wake_queue(a) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ) +#define netif_tx_start_all_queues(a) netif_start_queue(a) +#else +#define netif_tx_start_all_queues(a) do {} while (0) +#endif +#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev)) +#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev)) +#endif /* HAVE_TX_MQ */ +#ifndef NETIF_F_MULTI_QUEUE +#define NETIF_F_MULTI_QUEUE 0 +#define netif_is_multiqueue(a) 0 +#define netif_wake_subqueue(a, b) +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef __WARN_printf +extern void __kc_warn_slowpath(const char *file, const int line, + const char *fmt, ...) __attribute__((format(printf, 3, 4))); +#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg) +#endif /* __WARN_printf */ + +#ifndef WARN +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) +#endif /* WARN */ +#undef HAVE_IXGBE_DEBUG_FS +#undef HAVE_IGB_DEBUG_FS +#else /* < 2.6.27 */ +#define HAVE_TX_MQ +#define HAVE_NETDEV_SELECT_QUEUE +#ifdef CONFIG_DEBUG_FS +#define HAVE_IXGBE_DEBUG_FS +#define HAVE_IGB_DEBUG_FS +#endif /* CONFIG_DEBUG_FS */ +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) +#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \ + pci_resource_len(pdev, bar)) +#define pci_wake_from_d3 _kc_pci_wake_from_d3 +#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep +extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable); +extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev); +#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC) +#ifndef __skb_queue_head_init +static inline void __kc_skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} +#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q) +#endif + +#define PCI_EXP_DEVCAP2 36 /* Device Capabilities 2 */ +#define PCI_EXP_DEVCTL2 40 /* Device Control 2 */ + +#endif /* < 2.6.28 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) +#ifndef swap +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#endif +#define pci_request_selected_regions_exclusive(pdev, bars, name) \ + pci_request_selected_regions(pdev, bars, name) +#ifndef CONFIG_NR_CPUS +#define CONFIG_NR_CPUS 1 +#endif /* CONFIG_NR_CPUS */ +#ifndef pcie_aspm_enabled +#define pcie_aspm_enabled() (1) +#endif /* pcie_aspm_enabled */ + +#define PCI_EXP_SLTSTA_PDS 0x0040 /* Presence Detect State */ + +#ifndef pci_clear_master +extern void _kc_pci_clear_master(struct pci_dev *dev); +#define pci_clear_master(dev) _kc_pci_clear_master(dev) +#endif + +#ifndef PCI_EXP_LNKCTL_ASPMC +#define PCI_EXP_LNKCTL_ASPMC 0x0003 /* ASPM Control */ +#endif +#else /* < 2.6.29 */ +#ifndef HAVE_NET_DEVICE_OPS +#define HAVE_NET_DEVICE_OPS +#endif +#ifdef CONFIG_DCB +#define HAVE_PFC_MODE_ENABLE +#endif /* CONFIG_DCB */ +#endif /* < 2.6.29 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) +#define skb_rx_queue_recorded(a) false +#define skb_get_rx_queue(a) 0 +#define skb_record_rx_queue(a, b) do {} while (0) +#define skb_tx_hash(n, s) ___kc_skb_tx_hash((n), (s), (n)->real_num_tx_queues) +#ifndef CONFIG_PCI_IOV +#undef pci_enable_sriov +#define pci_enable_sriov(a, b) -ENOTSUPP +#undef pci_disable_sriov +#define pci_disable_sriov(a) do {} while (0) +#endif /* CONFIG_PCI_IOV */ +#ifndef pr_cont +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) +#endif /* pr_cont */ +static inline void _kc_synchronize_irq(unsigned int a) +{ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) + synchronize_irq(); +#else /* < 2.5.28 */ + synchronize_irq(a); +#endif /* < 2.5.28 */ +} +#undef synchronize_irq +#define synchronize_irq(a) _kc_synchronize_irq(a) + +#define PCI_EXP_LNKCTL2 48 /* Link Control 2 */ + +#else /* < 2.6.30 */ +#define HAVE_ASPM_QUIRKS +#endif /* < 2.6.30 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) ) +#define ETH_P_1588 0x88F7 +#define ETH_P_FIP 0x8914 +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc_count) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(uclist, dev) \ + for (uclist = dev->uc_list; uclist; uclist = uclist->next) +#endif +#ifndef PORT_OTHER +#define PORT_OTHER 0xff +#endif +#ifndef MDIO_PHY_ID_PRTAD +#define MDIO_PHY_ID_PRTAD 0x03e0 +#endif +#ifndef MDIO_PHY_ID_DEVAD +#define MDIO_PHY_ID_DEVAD 0x001f +#endif +#ifndef skb_dst +#define skb_dst(s) ((s)->dst) +#endif + +#ifndef SUPPORTED_1000baseKX_Full +#define SUPPORTED_1000baseKX_Full (1 << 17) +#endif +#ifndef SUPPORTED_10000baseKX4_Full +#define SUPPORTED_10000baseKX4_Full (1 << 18) +#endif +#ifndef SUPPORTED_10000baseKR_Full +#define SUPPORTED_10000baseKR_Full (1 << 19) +#endif + +#ifndef ADVERTISED_1000baseKX_Full +#define ADVERTISED_1000baseKX_Full (1 << 17) +#endif +#ifndef ADVERTISED_10000baseKX4_Full +#define ADVERTISED_10000baseKX4_Full (1 << 18) +#endif +#ifndef ADVERTISED_10000baseKR_Full +#define ADVERTISED_10000baseKR_Full (1 << 19) +#endif + +#else /* < 2.6.31 */ +#ifndef HAVE_NETDEV_STORAGE_ADDRESS +#define HAVE_NETDEV_STORAGE_ADDRESS +#endif +#ifndef HAVE_NETDEV_HW_ADDR +#define HAVE_NETDEV_HW_ADDR +#endif +#ifndef HAVE_TRANS_START_IN_QUEUE +#define HAVE_TRANS_START_IN_QUEUE +#endif +#ifndef HAVE_INCLUDE_LINUX_MDIO_H +#define HAVE_INCLUDE_LINUX_MDIO_H +#endif +#endif /* < 2.6.31 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) ) +#undef netdev_tx_t +#define netdev_tx_t int +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef NETIF_F_FCOE_MTU +#define NETIF_F_FCOE_MTU (1 << 26) +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +static inline int _kc_pm_runtime_get_sync() +{ + return 1; +} +#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync() +#else /* 2.6.0 => 2.6.32 */ +static inline int _kc_pm_runtime_get_sync(struct device *dev) +{ + return 1; +} +#ifndef pm_runtime_get_sync +#define pm_runtime_get_sync(dev) _kc_pm_runtime_get_sync(dev) +#endif +#endif /* 2.6.0 => 2.6.32 */ +#ifndef pm_runtime_put +#define pm_runtime_put(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_sync +#define pm_runtime_put_sync(dev) do {} while (0) +#endif +#ifndef pm_runtime_resume +#define pm_runtime_resume(dev) do {} while (0) +#endif +#ifndef pm_schedule_suspend +#define pm_schedule_suspend(dev, t) do {} while (0) +#endif +#ifndef pm_runtime_set_suspended +#define pm_runtime_set_suspended(dev) do {} while (0) +#endif +#ifndef pm_runtime_disable +#define pm_runtime_disable(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_noidle +#define pm_runtime_put_noidle(dev) do {} while (0) +#endif +#ifndef pm_runtime_set_active +#define pm_runtime_set_active(dev) do {} while (0) +#endif +#ifndef pm_runtime_enable +#define pm_runtime_enable(dev) do {} while (0) +#endif +#ifndef pm_runtime_get_noresume +#define pm_runtime_get_noresume(dev) do {} while (0) +#endif +#else /* < 2.6.32 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE +#define HAVE_NETDEV_OPS_FCOE_ENABLE +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_OPS_GETAPP +#define HAVE_DCBNL_OPS_GETAPP +#endif +#endif /* CONFIG_DCB */ +#include +/* IOV bad DMA target work arounds require at least this kernel rev support */ +#define HAVE_PCIE_TYPE +#endif /* < 2.6.32 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) ) +#ifndef pci_pcie_cap +#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP) +#endif +#ifndef IPV4_FLOW +#define IPV4_FLOW 0x10 +#endif /* IPV4_FLOW */ +#ifndef IPV6_FLOW +#define IPV6_FLOW 0x11 +#endif /* IPV6_FLOW */ +/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */ +#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) ) +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#endif /* RHEL6 or SLES11 SP1 */ +#ifndef __percpu +#define __percpu +#endif /* __percpu */ +#ifndef PORT_DA +#define PORT_DA PORT_OTHER +#endif +#ifndef PORT_NONE +#define PORT_NONE PORT_OTHER +#endif + +#if ((RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))) +#if !defined(CONFIG_X86_32) && !defined(CONFIG_NEED_DMA_MAP_STATE) +#undef DEFINE_DMA_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_ADDR(ADDR_NAME) dma_addr_t ADDR_NAME +#undef DEFINE_DMA_UNMAP_LEN +#define DEFINE_DMA_UNMAP_LEN(LEN_NAME) __u32 LEN_NAME +#undef dma_unmap_addr +#define dma_unmap_addr(PTR, ADDR_NAME) ((PTR)->ADDR_NAME) +#undef dma_unmap_addr_set +#define dma_unmap_addr_set(PTR, ADDR_NAME, VAL) (((PTR)->ADDR_NAME) = (VAL)) +#undef dma_unmap_len +#define dma_unmap_len(PTR, LEN_NAME) ((PTR)->LEN_NAME) +#undef dma_unmap_len_set +#define dma_unmap_len_set(PTR, LEN_NAME, VAL) (((PTR)->LEN_NAME) = (VAL)) +#endif /* CONFIG_X86_64 && !CONFIG_NEED_DMA_MAP_STATE */ +#endif /* RHEL_RELEASE_CODE */ + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,8)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0))) || \ + ((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0)))))) +static inline bool pci_is_pcie(struct pci_dev *dev) +{ + return !!pci_pcie_cap(dev); +} +#endif /* RHEL_RELEASE_CODE */ + +#ifndef __always_unused +#define __always_unused __attribute__((__unused__)) +#endif +#ifndef __maybe_unused +#define __maybe_unused __attribute__((__unused__)) +#endif + +#if (!(RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)))) +#define sk_tx_queue_get(_sk) (-1) +#define sk_tx_queue_set(_sk, _tx_queue) do {} while(0) +#endif /* !(RHEL >= 6.2) */ + +#if (RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,4)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) +#define HAVE_RHEL6_ETHTOOL_OPS_EXT_STRUCT +#define HAVE_ETHTOOL_SET_PHYS_ID +#define HAVE_ETHTOOL_GET_TS_INFO +#endif /* RHEL >= 6.4 && RHEL < 7.0 */ + +#if (RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) +#define HAVE_RHEL6_NETDEV_OPS_EXT_FDB +#endif /* RHEL >= 6.5 && RHEL < 7.0 */ + +#else /* < 2.6.33 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#endif /* < 2.6.33 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) +#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +#ifndef pci_num_vf +#define pci_num_vf(pdev) _kc_pci_num_vf(pdev) +extern int _kc_pci_num_vf(struct pci_dev *dev); +#endif +#endif /* RHEL_RELEASE_CODE */ + +#ifndef ETH_FLAG_NTUPLE +#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE +#endif + +#ifndef netdev_mc_count +#define netdev_mc_count(dev) ((dev)->mc_count) +#endif +#ifndef netdev_mc_empty +#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) +#endif +#ifndef netdev_for_each_mc_addr +#define netdev_for_each_mc_addr(mclist, dev) \ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) +#endif +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc.count) +#endif +#ifndef netdev_uc_empty +#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(ha, dev) \ + list_for_each_entry(ha, &dev->uc.list, list) +#endif +#ifndef dma_set_coherent_mask +#define dma_set_coherent_mask(dev,mask) \ + pci_set_consistent_dma_mask(to_pci_dev(dev),(mask)) +#endif +#ifndef pci_dev_run_wake +#define pci_dev_run_wake(pdev) (0) +#endif + +/* netdev logging taken from include/linux/netdevice.h */ +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (dev->reg_state != NETREG_REGISTERED) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#undef netdev_printk +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \ + printk(level "%s: " format, pci_name(pdev), ##args); \ +} while(0) +#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct pci_dev *pdev = _kc_netdev_to_pdev(netdev); \ + struct device *dev = pci_dev_to_dev(pdev); \ + dev_printk(level, dev, "%s: " format, \ + netdev_name(netdev), ##args); \ +} while(0) +#else /* 2.6.21 => 2.6.34 */ +#define netdev_printk(level, netdev, format, args...) \ + dev_printk(level, (netdev)->dev.parent, \ + "%s: " format, \ + netdev_name(netdev), ##args) +#endif /* <2.6.0 <2.6.21 <2.6.34 */ +#undef netdev_emerg +#define netdev_emerg(dev, format, args...) \ + netdev_printk(KERN_EMERG, dev, format, ##args) +#undef netdev_alert +#define netdev_alert(dev, format, args...) \ + netdev_printk(KERN_ALERT, dev, format, ##args) +#undef netdev_crit +#define netdev_crit(dev, format, args...) \ + netdev_printk(KERN_CRIT, dev, format, ##args) +#undef netdev_err +#define netdev_err(dev, format, args...) \ + netdev_printk(KERN_ERR, dev, format, ##args) +#undef netdev_warn +#define netdev_warn(dev, format, args...) \ + netdev_printk(KERN_WARNING, dev, format, ##args) +#undef netdev_notice +#define netdev_notice(dev, format, args...) \ + netdev_printk(KERN_NOTICE, dev, format, ##args) +#undef netdev_info +#define netdev_info(dev, format, args...) \ + netdev_printk(KERN_INFO, dev, format, ##args) +#undef netdev_dbg +#if defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#elif defined(CONFIG_DYNAMIC_DEBUG) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \ + netdev_name(__dev), ##args); \ +} while (0) +#else /* DEBUG */ +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ + 0; \ +}) +#endif /* DEBUG */ + +#undef netif_printk +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#undef netif_emerg +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#undef netif_alert +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#undef netif_crit +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#undef netif_err +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#undef netif_warn +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#undef netif_notice +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#undef netif_info +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) +#undef netif_dbg +#define netif_dbg(priv, type, dev, fmt, args...) \ + netif_level(dbg, priv, type, dev, fmt, ##args) + +#ifdef SET_SYSTEM_SLEEP_PM_OPS +#define HAVE_SYSTEM_SLEEP_PM_OPS +#endif + +#ifndef for_each_set_bit +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) +#endif /* for_each_set_bit */ + +#ifndef DEFINE_DMA_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN +#define dma_unmap_addr pci_unmap_addr +#define dma_unmap_addr_set pci_unmap_addr_set +#define dma_unmap_len pci_unmap_len +#define dma_unmap_len_set pci_unmap_len_set +#endif /* DEFINE_DMA_UNMAP_ADDR */ + +#if (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,3)) +#ifdef IGB_HWMON +#ifdef CONFIG_DEBUG_LOCK_ALLOC +#define sysfs_attr_init(attr) \ + do { \ + static struct lock_class_key __key; \ + (attr)->key = &__key; \ + } while (0) +#else +#define sysfs_attr_init(attr) do {} while (0) +#endif /* CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* IGB_HWMON */ +#endif /* RHEL_RELEASE_CODE */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +static inline bool _kc_pm_runtime_suspended() +{ + return false; +} +#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended() +#else /* 2.6.0 => 2.6.34 */ +static inline bool _kc_pm_runtime_suspended(struct device *dev) +{ + return false; +} +#ifndef pm_runtime_suspended +#define pm_runtime_suspended(dev) _kc_pm_runtime_suspended(dev) +#endif +#endif /* 2.6.0 => 2.6.34 */ + +#else /* < 2.6.34 */ +#define HAVE_SYSTEM_SLEEP_PM_OPS +#ifndef HAVE_SET_RX_MODE +#define HAVE_SET_RX_MODE +#endif + +#endif /* < 2.6.34 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) + +ssize_t _kc_simple_write_to_buffer(void *to, size_t available, loff_t *ppos, + const void __user *from, size_t count); +#define simple_write_to_buffer _kc_simple_write_to_buffer + +#ifndef numa_node_id +#define numa_node_id() 0 +#endif +#ifdef HAVE_TX_MQ +#include +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0))) +void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int); +#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues +#endif /* !(RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) */ +#else /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#define netif_set_real_num_tx_queues(_netdev, _count) \ + do { \ + (_netdev)->egress_subqueue_count = _count; \ + } while (0) +#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#else /* HAVE_TX_MQ */ +#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0) +#endif /* HAVE_TX_MQ */ +#ifndef ETH_FLAG_RXHASH +#define ETH_FLAG_RXHASH (1<<28) +#endif /* ETH_FLAG_RXHASH */ +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) +#define HAVE_IRQ_AFFINITY_HINT +#endif +#else /* < 2.6.35 */ +#define HAVE_PM_QOS_REQUEST_LIST +#define HAVE_IRQ_AFFINITY_HINT +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32); +#define ethtool_op_set_flags _kc_ethtool_op_set_flags +extern u32 _kc_ethtool_op_get_flags(struct net_device *); +#define ethtool_op_get_flags _kc_ethtool_op_get_flags + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#ifdef NET_IP_ALIGN +#undef NET_IP_ALIGN +#endif +#define NET_IP_ALIGN 0 +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#ifdef NET_SKB_PAD +#undef NET_SKB_PAD +#endif + +#if (L1_CACHE_BYTES > 32) +#define NET_SKB_PAD L1_CACHE_BYTES +#else +#define NET_SKB_PAD 32 +#endif + +static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC); + if (skb) { +#if (NET_IP_ALIGN + NET_SKB_PAD) + skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); +#endif + skb->dev = dev; + } + return skb; +} + +#ifdef netdev_alloc_skb_ip_align +#undef netdev_alloc_skb_ip_align +#endif +#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l) + +#undef netif_level +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#undef usleep_range +#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000)) + +#define u64_stats_update_begin(a) do { } while(0) +#define u64_stats_update_end(a) do { } while(0) +#define u64_stats_fetch_begin(a) do { } while(0) +#define u64_stats_fetch_retry_bh(a) (0) +#define u64_stats_fetch_begin_bh(a) (0) + +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,1)) +#define HAVE_8021P_SUPPORT +#endif + +#else /* < 2.6.36 */ + + +#define HAVE_PM_QOS_REQUEST_ACTIVE +#define HAVE_8021P_SUPPORT +#define HAVE_NDO_GET_STATS64 +#endif /* < 2.6.36 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ) +#ifndef netif_set_real_num_rx_queues +static inline int __kc_netif_set_real_num_rx_queues(struct net_device *dev, + unsigned int rxq) +{ + return 0; +} +#define netif_set_real_num_rx_queues(dev, rxq) \ + __kc_netif_set_real_num_rx_queues((dev), (rxq)) +#endif +#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) +#endif +#ifndef VLAN_N_VID +#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN +#endif /* VLAN_N_VID */ +#ifndef ETH_FLAG_TXVLAN +#define ETH_FLAG_TXVLAN (1 << 7) +#endif /* ETH_FLAG_TXVLAN */ +#ifndef ETH_FLAG_RXVLAN +#define ETH_FLAG_RXVLAN (1 << 8) +#endif /* ETH_FLAG_RXVLAN */ + +static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb) +{ + WARN_ON(skb->ip_summed != CHECKSUM_NONE); +} +#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb) + +static inline void *_kc_vzalloc_node(unsigned long size, int node) +{ + void *addr = vmalloc_node(size, node); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node) + +static inline void *_kc_vzalloc(unsigned long size) +{ + void *addr = vmalloc(size); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc(_size) _kc_vzalloc(_size) + +#ifndef vlan_get_protocol +static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb) +{ + if (vlan_tx_tag_present(skb) || + skb->protocol != cpu_to_be16(ETH_P_8021Q)) + return skb->protocol; + + if (skb_headlen(skb) < sizeof(struct vlan_ethhdr)) + return 0; + + return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto; +} +#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb) +#endif +#ifdef HAVE_HW_TIME_STAMP +#define SKBTX_HW_TSTAMP (1 << 0) +#define SKBTX_IN_PROGRESS (1 << 2) +#define SKB_SHARED_TX_IS_UNION +#endif + +#ifndef device_wakeup_enable +#define device_wakeup_enable(dev) device_set_wakeup_enable(dev, true) +#endif + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) ) +#ifndef HAVE_VLAN_RX_REGISTER +#define HAVE_VLAN_RX_REGISTER +#endif +#endif /* > 2.4.18 */ +#endif /* < 2.6.37 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define skb_checksum_start_offset(skb) skb_transport_offset(skb) +#else /* 2.6.22 -> 2.6.37 */ +static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb) +{ + return skb->csum_start - skb_headroom(skb); +} +#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb) +#endif /* 2.6.22 -> 2.6.37 */ +#ifdef CONFIG_DCB +#ifndef IEEE_8021QAZ_MAX_TCS +#define IEEE_8021QAZ_MAX_TCS 8 +#endif +#ifndef DCB_CAP_DCBX_HOST +#define DCB_CAP_DCBX_HOST 0x01 +#endif +#ifndef DCB_CAP_DCBX_LLD_MANAGED +#define DCB_CAP_DCBX_LLD_MANAGED 0x02 +#endif +#ifndef DCB_CAP_DCBX_VER_CEE +#define DCB_CAP_DCBX_VER_CEE 0x04 +#endif +#ifndef DCB_CAP_DCBX_VER_IEEE +#define DCB_CAP_DCBX_VER_IEEE 0x08 +#endif +#ifndef DCB_CAP_DCBX_STATIC +#define DCB_CAP_DCBX_STATIC 0x10 +#endif +#endif /* CONFIG_DCB */ +#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) +#define CONFIG_XPS +#endif /* RHEL_RELEASE_VERSION(6,2) */ +#endif /* < 2.6.38 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#ifndef NETIF_F_RXCSUM +#define NETIF_F_RXCSUM (1 << 29) +#endif +#ifndef skb_queue_reverse_walk_safe +#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ + for (skb = (queue)->prev, tmp = skb->prev; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->prev) +#endif +#else /* < 2.6.39 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifndef HAVE_MQPRIO +#define HAVE_MQPRIO +#endif +#ifndef HAVE_SETUP_TC +#define HAVE_SETUP_TC +#endif +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_IEEE +#define HAVE_DCBNL_IEEE +#endif +#endif /* CONFIG_DCB */ +#ifndef HAVE_NDO_SET_FEATURES +#define HAVE_NDO_SET_FEATURES +#endif +#endif /* < 2.6.39 */ + +/*****************************************************************************/ +/* use < 2.6.40 because of a Fedora 15 kernel update where they + * updated the kernel version to 2.6.40.x and they back-ported 3.0 features + * like set_phys_id for ethtool. + */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) ) +#ifdef ETHTOOL_GRXRINGS +#ifndef FLOW_EXT +#define FLOW_EXT 0x80000000 +union _kc_ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + __u8 hdata[60]; +}; +struct _kc_ethtool_flow_ext { + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; +}; +struct _kc_ethtool_rx_flow_spec { + __u32 flow_type; + union _kc_ethtool_flow_union h_u; + struct _kc_ethtool_flow_ext h_ext; + union _kc_ethtool_flow_union m_u; + struct _kc_ethtool_flow_ext m_ext; + __u64 ring_cookie; + __u32 location; +}; +#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec +#endif /* FLOW_EXT */ +#endif + +#define pci_disable_link_state_locked pci_disable_link_state + +#ifndef PCI_LTR_VALUE_MASK +#define PCI_LTR_VALUE_MASK 0x000003ff +#endif +#ifndef PCI_LTR_SCALE_MASK +#define PCI_LTR_SCALE_MASK 0x00001c00 +#endif +#ifndef PCI_LTR_SCALE_SHIFT +#define PCI_LTR_SCALE_SHIFT 10 +#endif + +#else /* < 2.6.40 */ +#define HAVE_ETHTOOL_SET_PHYS_ID +#endif /* < 2.6.40 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,0,0) ) +#define USE_LEGACY_PM_SUPPORT +#endif /* < 3.0.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#ifndef __netdev_alloc_skb_ip_align +#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l) +#endif /* __netdev_alloc_skb_ip_align */ +#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app) +#define dcb_ieee_delapp(dev, app) 0 +#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority) + +/* 1000BASE-T Control register */ +#define CTL1000_AS_MASTER 0x0800 +#define CTL1000_ENABLE_MASTER 0x1000 + +#else /* < 3.1.0 */ +#ifndef HAVE_DCBNL_IEEE_DELAPP +#define HAVE_DCBNL_IEEE_DELAPP +#endif +#endif /* < 3.1.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#ifdef ETHTOOL_GRXRINGS +#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS +#endif /* ETHTOOL_GRXRINGS */ + +#ifndef skb_frag_size +#define skb_frag_size(frag) _kc_skb_frag_size(frag) +static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag) +{ + return frag->size; +} +#endif /* skb_frag_size */ + +#ifndef skb_frag_size_sub +#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta) +static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta) +{ + frag->size -= delta; +} +#endif /* skb_frag_size_sub */ + +#ifndef skb_frag_page +#define skb_frag_page(frag) _kc_skb_frag_page(frag) +static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag) +{ + return frag->page; +} +#endif /* skb_frag_page */ + +#ifndef skb_frag_address +#define skb_frag_address(frag) _kc_skb_frag_address(frag) +static inline void *_kc_skb_frag_address(const skb_frag_t *frag) +{ + return page_address(skb_frag_page(frag)) + frag->page_offset; +} +#endif /* skb_frag_address */ + +#ifndef skb_frag_dma_map +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) +#include +#endif +#define skb_frag_dma_map(dev,frag,offset,size,dir) \ + _kc_skb_frag_dma_map(dev,frag,offset,size,dir) +static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + return dma_map_page(dev, skb_frag_page(frag), + frag->page_offset + offset, size, dir); +} +#endif /* skb_frag_dma_map */ + +#ifndef __skb_frag_unref +#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag) +static inline void __kc_skb_frag_unref(skb_frag_t *frag) +{ + put_page(skb_frag_page(frag)); +} +#endif /* __skb_frag_unref */ + +#ifndef SPEED_UNKNOWN +#define SPEED_UNKNOWN -1 +#endif +#ifndef DUPLEX_UNKNOWN +#define DUPLEX_UNKNOWN 0xff +#endif +#if (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,3)) +#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_PCI_DEV_FLAGS_ASSIGNED +#endif +#endif +#else /* < 3.2.0 */ +#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_VF_SPOOFCHK_CONFIGURE +#endif +#endif /* < 3.2.0 */ + +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE == RHEL_RELEASE_VERSION(6,2)) +#undef ixgbe_get_netdev_tc_txq +#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc]) +#endif +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) ) +typedef u32 kni_netdev_features_t; +#undef PCI_EXP_TYPE_RC_EC +#define PCI_EXP_TYPE_RC_EC 0xa /* Root Complex Event Collector */ +#ifndef CONFIG_BQL +#define netdev_tx_completed_queue(_q, _p, _b) do {} while (0) +#define netdev_completed_queue(_n, _p, _b) do {} while (0) +#define netdev_tx_sent_queue(_q, _b) do {} while (0) +#define netdev_sent_queue(_n, _b) do {} while (0) +#define netdev_tx_reset_queue(_q) do {} while (0) +#define netdev_reset_queue(_n) do {} while (0) +#endif +#else /* ! < 3.3.0 */ +typedef netdev_features_t kni_netdev_features_t; +#define HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef ETHTOOL_SRXNTUPLE +#undef ETHTOOL_SRXNTUPLE +#endif +#endif /* < 3.3.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +#ifndef NETIF_F_RXFCS +#define NETIF_F_RXFCS 0 +#endif /* NETIF_F_RXFCS */ +#ifndef NETIF_F_RXALL +#define NETIF_F_RXALL 0 +#endif /* NETIF_F_RXALL */ + +#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) +#define NUMTCS_RETURNS_U8 + +int _kc_simple_open(struct inode *inode, struct file *file); +#define simple_open _kc_simple_open +#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */ + + +#ifndef skb_add_rx_frag +#define skb_add_rx_frag _kc_skb_add_rx_frag +extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, + int, int, unsigned int); +#endif +#ifdef NET_ADDR_RANDOM +#define eth_hw_addr_random(N) do { \ + random_ether_addr(N->dev_addr); \ + N->addr_assign_type |= NET_ADDR_RANDOM; \ + } while (0) +#else /* NET_ADDR_RANDOM */ +#define eth_hw_addr_random(N) random_ether_addr(N->dev_addr) +#endif /* NET_ADDR_RANDOM */ +#else /* < 3.4.0 */ +#include +#endif /* >= 3.4.0 */ + +/*****************************************************************************/ +#if defined(E1000E_PTP) || defined(IGB_PTP) || defined(IXGBE_PTP) || defined(I40E_PTP) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,0,0) ) && IS_ENABLED(CONFIG_PTP_1588_CLOCK) +#define HAVE_PTP_1588_CLOCK +#else +#error Cannot enable PTP Hardware Clock support due to a pre-3.0 kernel version or CONFIG_PTP_1588_CLOCK not enabled in the kernel +#endif /* > 3.0.0 && IS_ENABLED(CONFIG_PTP_1588_CLOCK) */ +#endif /* E1000E_PTP || IGB_PTP || IXGBE_PTP || I40E_PTP */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ) +#define skb_tx_timestamp(skb) do {} while (0) +static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2) +{ + return !compare_ether_addr(addr1, addr2); +} +#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2)) +#else +#define HAVE_FDB_OPS +#define HAVE_ETHTOOL_GET_TS_INFO +#endif /* < 3.5.0 */ + +/*****************************************************************************/ +#include +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,6,0) ) +#define PCI_EXP_LNKCAP2 44 /* Link Capability 2 */ + +#ifndef MDIO_EEE_100TX +#define MDIO_EEE_100TX 0x0002 /* 100TX EEE cap */ +#endif +#ifndef MDIO_EEE_1000T +#define MDIO_EEE_1000T 0x0004 /* 1000T EEE cap */ +#endif +#ifndef MDIO_EEE_10GT +#define MDIO_EEE_10GT 0x0008 /* 10GT EEE cap */ +#endif +#ifndef MDIO_EEE_1000KX +#define MDIO_EEE_1000KX 0x0010 /* 1000KX EEE cap */ +#endif +#ifndef MDIO_EEE_10GKX4 +#define MDIO_EEE_10GKX4 0x0020 /* 10G KX4 EEE cap */ +#endif +#ifndef MDIO_EEE_10GKR +#define MDIO_EEE_10GKR 0x0040 /* 10G KR EEE cap */ +#endif +#endif /* < 3.6.0 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,7,0) ) +#ifndef ADVERTISED_40000baseKR4_Full +/* these defines were all added in one commit, so should be safe + * to trigger activiation on one define + */ +#define SUPPORTED_40000baseKR4_Full (1 << 23) +#define SUPPORTED_40000baseCR4_Full (1 << 24) +#define SUPPORTED_40000baseSR4_Full (1 << 25) +#define SUPPORTED_40000baseLR4_Full (1 << 26) +#define ADVERTISED_40000baseKR4_Full (1 << 23) +#define ADVERTISED_40000baseCR4_Full (1 << 24) +#define ADVERTISED_40000baseSR4_Full (1 << 25) +#define ADVERTISED_40000baseLR4_Full (1 << 26) +#endif + +/** + * mmd_eee_cap_to_ethtool_sup_t + * @eee_cap: value of the MMD EEE Capability register + * + * A small helper function that translates MMD EEE Capability (3.20) bits + * to ethtool supported settings. + */ +static inline u32 __kc_mmd_eee_cap_to_ethtool_sup_t(u16 eee_cap) +{ + u32 supported = 0; + + if (eee_cap & MDIO_EEE_100TX) + supported |= SUPPORTED_100baseT_Full; + if (eee_cap & MDIO_EEE_1000T) + supported |= SUPPORTED_1000baseT_Full; + if (eee_cap & MDIO_EEE_10GT) + supported |= SUPPORTED_10000baseT_Full; + if (eee_cap & MDIO_EEE_1000KX) + supported |= SUPPORTED_1000baseKX_Full; + if (eee_cap & MDIO_EEE_10GKX4) + supported |= SUPPORTED_10000baseKX4_Full; + if (eee_cap & MDIO_EEE_10GKR) + supported |= SUPPORTED_10000baseKR_Full; + + return supported; +} +#define mmd_eee_cap_to_ethtool_sup_t(eee_cap) \ + __kc_mmd_eee_cap_to_ethtool_sup_t(eee_cap) + +/** + * mmd_eee_adv_to_ethtool_adv_t + * @eee_adv: value of the MMD EEE Advertisement/Link Partner Ability registers + * + * A small helper function that translates the MMD EEE Advertisement (7.60) + * and MMD EEE Link Partner Ability (7.61) bits to ethtool advertisement + * settings. + */ +static inline u32 __kc_mmd_eee_adv_to_ethtool_adv_t(u16 eee_adv) +{ + u32 adv = 0; + + if (eee_adv & MDIO_EEE_100TX) + adv |= ADVERTISED_100baseT_Full; + if (eee_adv & MDIO_EEE_1000T) + adv |= ADVERTISED_1000baseT_Full; + if (eee_adv & MDIO_EEE_10GT) + adv |= ADVERTISED_10000baseT_Full; + if (eee_adv & MDIO_EEE_1000KX) + adv |= ADVERTISED_1000baseKX_Full; + if (eee_adv & MDIO_EEE_10GKX4) + adv |= ADVERTISED_10000baseKX4_Full; + if (eee_adv & MDIO_EEE_10GKR) + adv |= ADVERTISED_10000baseKR_Full; + + return adv; +} +#define mmd_eee_adv_to_ethtool_adv_t(eee_adv) \ + __kc_mmd_eee_adv_to_ethtool_adv_t(eee_adv) + +/** + * ethtool_adv_to_mmd_eee_adv_t + * @adv: the ethtool advertisement settings + * + * A small helper function that translates ethtool advertisement settings + * to EEE advertisements for the MMD EEE Advertisement (7.60) and + * MMD EEE Link Partner Ability (7.61) registers. + */ +static inline u16 __kc_ethtool_adv_to_mmd_eee_adv_t(u32 adv) +{ + u16 reg = 0; + + if (adv & ADVERTISED_100baseT_Full) + reg |= MDIO_EEE_100TX; + if (adv & ADVERTISED_1000baseT_Full) + reg |= MDIO_EEE_1000T; + if (adv & ADVERTISED_10000baseT_Full) + reg |= MDIO_EEE_10GT; + if (adv & ADVERTISED_1000baseKX_Full) + reg |= MDIO_EEE_1000KX; + if (adv & ADVERTISED_10000baseKX4_Full) + reg |= MDIO_EEE_10GKX4; + if (adv & ADVERTISED_10000baseKR_Full) + reg |= MDIO_EEE_10GKR; + + return reg; +} +#define ethtool_adv_to_mmd_eee_adv_t(adv) \ + __kc_ethtool_adv_to_mmd_eee_adv_t(adv) + +#ifndef pci_pcie_type +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +static inline u8 pci_pcie_type(struct pci_dev *pdev) +{ + int pos; + u16 reg16; + + pos = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (!pos) + BUG(); + pci_read_config_word(pdev, pos + PCI_EXP_FLAGS, ®16); + return (reg16 & PCI_EXP_FLAGS_TYPE) >> 4; +} +#else /* < 2.6.24 */ +#define pci_pcie_type(x) (x)->pcie_type +#endif /* < 2.6.24 */ +#endif /* pci_pcie_type */ + +#define ptp_clock_register(caps, args...) ptp_clock_register(caps) + +#ifndef PCI_EXP_LNKSTA2 +int __kc_pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); +#define pcie_capability_read_word(d,p,v) __kc_pcie_capability_read_word(d,p,v) +int __kc_pcie_capability_write_word(struct pci_dev *dev, int pos, u16 val); +#define pcie_capability_write_word(d,p,v) __kc_pcie_capability_write_word(d,p,v) +int __kc_pcie_capability_clear_and_set_word(struct pci_dev *dev, int pos, + u16 clear, u16 set); +#define pcie_capability_clear_and_set_word(d,p,c,s) \ + __kc_pcie_capability_clear_and_set_word(d,p,c,s) + +#define PCI_EXP_LNKSTA2 50 /* Link Status 2 */ + +static inline int pcie_capability_clear_word(struct pci_dev *dev, int pos, + u16 clear) +{ + return __kc_pcie_capability_clear_and_set_word(dev, pos, clear, 0); +} +#endif /* !PCI_EXP_LNKSTA2 */ + +#if (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) +#define USE_CONST_DEV_UC_CHAR +#endif + +#else /* >= 3.7.0 */ +#define HAVE_CONST_STRUCT_PCI_ERROR_HANDLERS +#define USE_CONST_DEV_UC_CHAR +#endif /* >= 3.7.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,8,0) ) +#ifndef PCI_EXP_LNKCTL_ASPM_L0S +#define PCI_EXP_LNKCTL_ASPM_L0S 0x01 /* L0s Enable */ +#endif +#ifndef PCI_EXP_LNKCTL_ASPM_L1 +#define PCI_EXP_LNKCTL_ASPM_L1 0x02 /* L1 Enable */ +#endif +#define HAVE_CONFIG_HOTPLUG +/* Reserved Ethernet Addresses per IEEE 802.1Q */ +static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { + 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +#if !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) &&\ + !(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,5)) +static inline bool is_link_local_ether_addr(const u8 *addr) +{ + __be16 *a = (__be16 *)addr; + static const __be16 *b = (const __be16 *)eth_reserved_addr_base; + static const __be16 m = cpu_to_be16(0xfff0); + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | ((a[2] ^ b[2]) & m)) == 0; +} +#endif /* !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,3,0)) */ +#else /* >= 3.8.0 */ +#ifndef __devinit +#define __devinit +#define HAVE_ENCAP_CSUM_OFFLOAD +#endif + +#ifndef __devinitdata +#define __devinitdata +#endif + +#ifndef __devexit +#define __devexit +#endif + +#ifndef __devexit_p +#define __devexit_p +#endif + +#ifndef HAVE_SRIOV_CONFIGURE +#define HAVE_SRIOV_CONFIGURE +#endif + +#define HAVE_BRIDGE_ATTRIBS +#ifndef BRIDGE_MODE_VEB +#define BRIDGE_MODE_VEB 0 /* Default loopback mode */ +#endif /* BRIDGE_MODE_VEB */ +#ifndef BRIDGE_MODE_VEPA +#define BRIDGE_MODE_VEPA 1 /* 802.1Qbg defined VEPA mode */ +#endif /* BRIDGE_MODE_VEPA */ +#endif /* >= 3.8.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,9,0) ) + +#undef hlist_entry +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#undef hlist_entry_safe +#define hlist_entry_safe(ptr, type, member) \ + (ptr) ? hlist_entry(ptr, type, member) : NULL + +#undef hlist_for_each_entry +#define hlist_for_each_entry(pos, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*(pos)), member); \ + pos; \ + pos = hlist_entry_safe((pos)->member.next, typeof(*(pos)), member)) + +#undef hlist_for_each_entry_safe +#define hlist_for_each_entry_safe(pos, n, head, member) \ + for (pos = hlist_entry_safe((head)->first, typeof(*pos), member); \ + pos && ({ n = pos->member.next; 1; }); \ + pos = hlist_entry_safe(n, typeof(*pos), member)) + +#ifdef CONFIG_XPS +extern int __kc_netif_set_xps_queue(struct net_device *, struct cpumask *, u16); +#define netif_set_xps_queue(_dev, _mask, _idx) __kc_netif_set_xps_queue((_dev), (_mask), (_idx)) +#else /* CONFIG_XPS */ +#define netif_set_xps_queue(_dev, _mask, _idx) do {} while (0) +#endif /* CONFIG_XPS */ + +#ifdef HAVE_NETDEV_SELECT_QUEUE +#define _kc_hashrnd 0xd631614b /* not so random hash salt */ +extern u16 __kc_netdev_pick_tx(struct net_device *dev, struct sk_buff *skb); +#define __netdev_pick_tx __kc_netdev_pick_tx +#endif /* HAVE_NETDEV_SELECT_QUEUE */ +#else +#define HAVE_BRIDGE_FILTER +#define USE_DEFAULT_FDB_DEL_DUMP +#endif /* < 3.9.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +extern int __kc_pci_vfs_assigned(struct pci_dev *dev); +#else +static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + return 0; +} +#endif +#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev) + +#ifndef VLAN_TX_COOKIE_MAGIC +static inline struct sk_buff *__kc__vlan_hwaccel_put_tag(struct sk_buff *skb, + u16 vlan_tci) +{ +#ifdef VLAN_TAG_PRESENT + vlan_tci |= VLAN_TAG_PRESENT; +#endif + skb->vlan_tci = vlan_tci; + return skb; +} +#define __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci) \ + __kc__vlan_hwaccel_put_tag(skb, vlan_tci) +#endif + +#else /* >= 3.10.0 */ +#define HAVE_ENCAP_TSO_OFFLOAD +#endif /* >= 3.10.0 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,14,0) ) +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,6))) +#if (!(UBUNTU_KERNEL_CODE >= UBUNTU_KERNEL_VERSION(3,13,0,30,0) \ + && (UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(12,4) \ + || UBUNTU_RELEASE_CODE == UBUNTU_RELEASE_VERSION(14,4)))) +#if (!(SLE_VERSION_CODE == SLE_VERSION(12,0,0))) +#ifdef NETIF_F_RXHASH +#define PKT_HASH_TYPE_L3 0 +static inline void +skb_set_hash(struct sk_buff *skb, __u32 hash, __always_unused int type) +{ + skb->rxhash = hash; +} +#endif /* NETIF_F_RXHASH */ +#endif /* < SLES12 */ +#endif /* < 3.13.0-30.54 (Ubuntu 14.04) */ +#endif /* < RHEL7 */ +#endif /* < 3.14.0 */ + +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) +#define HAVE_VF_MIN_MAX_TXRATE 1 +#endif /* >= 3.16.0 */ + +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(3,19,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#define HAVE_NDO_DFLT_BRIDGE_ADD_MASK +#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#define HAVE_NDO_FDB_ADD_VID +#endif /* !RHEL 7.2 */ +#endif /* >= 3.19.0 */ + +#if (( LINUX_VERSION_CODE >= KERNEL_VERSION(4,0,0) ) \ + || ( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +/* vlan_tx_xx functions got renamed to skb_vlan */ +#define vlan_tx_tag_get skb_vlan_tag_get +#define vlan_tx_tag_present skb_vlan_tag_present +#if (!( RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(7,2) )) +#define HAVE_NDO_BRIDGE_SET_DEL_LINK_FLAGS +#endif /* !RHEL 7.2 */ +#endif /* 4.0.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,1,0) ) +/* ndo_bridge_getlink adds new nlflags parameter */ +#define HAVE_NDO_BRIDGE_GETLINK_NLFLAGS +#endif /* >= 4.1.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(4,2,0) ) +/* ndo_bridge_getlink adds new filter_mask and vlan_fill parameters */ +#define HAVE_NDO_BRIDGE_GETLINK_FILTER_MASK_VLAN_FILL +#endif /* >= 4.2.0 */ +#endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c new file mode 100644 index 00000000..e1a89388 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/igb/kcompat_ethtool.c @@ -0,0 +1,1171 @@ +/******************************************************************************* + + Intel(R) Gigabit Ethernet Linux driver + Copyright(c) 2007-2013 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* + * net/core/ethtool.c - Ethtool ioctl handler + * Copyright (c) 2003 Matthew Wilcox + * + * This file is where we call all the ethtool_ops commands to get + * the information ethtool needs. We fall back to calling do_ioctl() + * for drivers which haven't been converted to ethtool_ops yet. + * + * It's GPL, stupid. + * + * Modification by sfeldma@pobox.com to work as backward compat + * solution for pre-ethtool_ops kernels. + * - copied struct ethtool_ops from ethtool.h + * - defined SET_ETHTOOL_OPS + * - put in some #ifndef NETIF_F_xxx wrappers + * - changes refs to dev->ethtool_ops to ethtool_ops + * - changed dev_ethtool to ethtool_ioctl + * - remove EXPORT_SYMBOL()s + * - added _kc_ prefix in built-in ethtool_op_xxx ops. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "kcompat.h" + +#undef SUPPORTED_10000baseT_Full +#define SUPPORTED_10000baseT_Full (1 << 12) +#undef ADVERTISED_10000baseT_Full +#define ADVERTISED_10000baseT_Full (1 << 12) +#undef SPEED_10000 +#define SPEED_10000 10000 + +#undef ethtool_ops +#define ethtool_ops _kc_ethtool_ops + +struct _kc_ethtool_ops { + int (*get_settings)(struct net_device *, struct ethtool_cmd *); + int (*set_settings)(struct net_device *, struct ethtool_cmd *); + void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); + int (*get_regs_len)(struct net_device *); + void (*get_regs)(struct net_device *, struct ethtool_regs *, void *); + void (*get_wol)(struct net_device *, struct ethtool_wolinfo *); + int (*set_wol)(struct net_device *, struct ethtool_wolinfo *); + u32 (*get_msglevel)(struct net_device *); + void (*set_msglevel)(struct net_device *, u32); + int (*nway_reset)(struct net_device *); + u32 (*get_link)(struct net_device *); + int (*get_eeprom_len)(struct net_device *); + int (*get_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); + int (*set_eeprom)(struct net_device *, struct ethtool_eeprom *, u8 *); + int (*get_coalesce)(struct net_device *, struct ethtool_coalesce *); + int (*set_coalesce)(struct net_device *, struct ethtool_coalesce *); + void (*get_ringparam)(struct net_device *, struct ethtool_ringparam *); + int (*set_ringparam)(struct net_device *, struct ethtool_ringparam *); + void (*get_pauseparam)(struct net_device *, + struct ethtool_pauseparam*); + int (*set_pauseparam)(struct net_device *, + struct ethtool_pauseparam*); + u32 (*get_rx_csum)(struct net_device *); + int (*set_rx_csum)(struct net_device *, u32); + u32 (*get_tx_csum)(struct net_device *); + int (*set_tx_csum)(struct net_device *, u32); + u32 (*get_sg)(struct net_device *); + int (*set_sg)(struct net_device *, u32); + u32 (*get_tso)(struct net_device *); + int (*set_tso)(struct net_device *, u32); + int (*self_test_count)(struct net_device *); + void (*self_test)(struct net_device *, struct ethtool_test *, u64 *); + void (*get_strings)(struct net_device *, u32 stringset, u8 *); + int (*phys_id)(struct net_device *, u32); + int (*get_stats_count)(struct net_device *); + void (*get_ethtool_stats)(struct net_device *, struct ethtool_stats *, + u64 *); +} *ethtool_ops = NULL; + +#undef SET_ETHTOOL_OPS +#define SET_ETHTOOL_OPS(netdev, ops) (ethtool_ops = (ops)) + +/* + * Some useful ethtool_ops methods that are device independent. If we find that + * all drivers want to do the same thing here, we can turn these into dev_() + * function calls. + */ + +#undef ethtool_op_get_link +#define ethtool_op_get_link _kc_ethtool_op_get_link +u32 _kc_ethtool_op_get_link(struct net_device *dev) +{ + return netif_carrier_ok(dev) ? 1 : 0; +} + +#undef ethtool_op_get_tx_csum +#define ethtool_op_get_tx_csum _kc_ethtool_op_get_tx_csum +u32 _kc_ethtool_op_get_tx_csum(struct net_device *dev) +{ +#ifdef NETIF_F_IP_CSUM + return (dev->features & NETIF_F_IP_CSUM) != 0; +#else + return 0; +#endif +} + +#undef ethtool_op_set_tx_csum +#define ethtool_op_set_tx_csum _kc_ethtool_op_set_tx_csum +int _kc_ethtool_op_set_tx_csum(struct net_device *dev, u32 data) +{ +#ifdef NETIF_F_IP_CSUM + if (data) +#ifdef NETIF_F_IPV6_CSUM + dev->features |= (NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); + else + dev->features &= ~(NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM); +#else + dev->features |= NETIF_F_IP_CSUM; + else + dev->features &= ~NETIF_F_IP_CSUM; +#endif +#endif + + return 0; +} + +#undef ethtool_op_get_sg +#define ethtool_op_get_sg _kc_ethtool_op_get_sg +u32 _kc_ethtool_op_get_sg(struct net_device *dev) +{ +#ifdef NETIF_F_SG + return (dev->features & NETIF_F_SG) != 0; +#else + return 0; +#endif +} + +#undef ethtool_op_set_sg +#define ethtool_op_set_sg _kc_ethtool_op_set_sg +int _kc_ethtool_op_set_sg(struct net_device *dev, u32 data) +{ +#ifdef NETIF_F_SG + if (data) + dev->features |= NETIF_F_SG; + else + dev->features &= ~NETIF_F_SG; +#endif + + return 0; +} + +#undef ethtool_op_get_tso +#define ethtool_op_get_tso _kc_ethtool_op_get_tso +u32 _kc_ethtool_op_get_tso(struct net_device *dev) +{ +#ifdef NETIF_F_TSO + return (dev->features & NETIF_F_TSO) != 0; +#else + return 0; +#endif +} + +#undef ethtool_op_set_tso +#define ethtool_op_set_tso _kc_ethtool_op_set_tso +int _kc_ethtool_op_set_tso(struct net_device *dev, u32 data) +{ +#ifdef NETIF_F_TSO + if (data) + dev->features |= NETIF_F_TSO; + else + dev->features &= ~NETIF_F_TSO; +#endif + + return 0; +} + +/* Handlers for each ethtool command */ + +static int ethtool_get_settings(struct net_device *dev, void *useraddr) +{ + struct ethtool_cmd cmd = { ETHTOOL_GSET }; + int err; + + if (!ethtool_ops->get_settings) + return -EOPNOTSUPP; + + err = ethtool_ops->get_settings(dev, &cmd); + if (err < 0) + return err; + + if (copy_to_user(useraddr, &cmd, sizeof(cmd))) + return -EFAULT; + return 0; +} + +static int ethtool_set_settings(struct net_device *dev, void *useraddr) +{ + struct ethtool_cmd cmd; + + if (!ethtool_ops->set_settings) + return -EOPNOTSUPP; + + if (copy_from_user(&cmd, useraddr, sizeof(cmd))) + return -EFAULT; + + return ethtool_ops->set_settings(dev, &cmd); +} + +static int ethtool_get_drvinfo(struct net_device *dev, void *useraddr) +{ + struct ethtool_drvinfo info; + struct ethtool_ops *ops = ethtool_ops; + + if (!ops->get_drvinfo) + return -EOPNOTSUPP; + + memset(&info, 0, sizeof(info)); + info.cmd = ETHTOOL_GDRVINFO; + ops->get_drvinfo(dev, &info); + + if (ops->self_test_count) + info.testinfo_len = ops->self_test_count(dev); + if (ops->get_stats_count) + info.n_stats = ops->get_stats_count(dev); + if (ops->get_regs_len) + info.regdump_len = ops->get_regs_len(dev); + if (ops->get_eeprom_len) + info.eedump_len = ops->get_eeprom_len(dev); + + if (copy_to_user(useraddr, &info, sizeof(info))) + return -EFAULT; + return 0; +} + +static int ethtool_get_regs(struct net_device *dev, char *useraddr) +{ + struct ethtool_regs regs; + struct ethtool_ops *ops = ethtool_ops; + void *regbuf; + int reglen, ret; + + if (!ops->get_regs || !ops->get_regs_len) + return -EOPNOTSUPP; + + if (copy_from_user(®s, useraddr, sizeof(regs))) + return -EFAULT; + + reglen = ops->get_regs_len(dev); + if (regs.len > reglen) + regs.len = reglen; + + regbuf = kmalloc(reglen, GFP_USER); + if (!regbuf) + return -ENOMEM; + + ops->get_regs(dev, ®s, regbuf); + + ret = -EFAULT; + if (copy_to_user(useraddr, ®s, sizeof(regs))) + goto out; + useraddr += offsetof(struct ethtool_regs, data); + if (copy_to_user(useraddr, regbuf, reglen)) + goto out; + ret = 0; + +out: + kfree(regbuf); + return ret; +} + +static int ethtool_get_wol(struct net_device *dev, char *useraddr) +{ + struct ethtool_wolinfo wol = { ETHTOOL_GWOL }; + + if (!ethtool_ops->get_wol) + return -EOPNOTSUPP; + + ethtool_ops->get_wol(dev, &wol); + + if (copy_to_user(useraddr, &wol, sizeof(wol))) + return -EFAULT; + return 0; +} + +static int ethtool_set_wol(struct net_device *dev, char *useraddr) +{ + struct ethtool_wolinfo wol; + + if (!ethtool_ops->set_wol) + return -EOPNOTSUPP; + + if (copy_from_user(&wol, useraddr, sizeof(wol))) + return -EFAULT; + + return ethtool_ops->set_wol(dev, &wol); +} + +static int ethtool_get_msglevel(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GMSGLVL }; + + if (!ethtool_ops->get_msglevel) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_msglevel(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_msglevel(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_msglevel) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + ethtool_ops->set_msglevel(dev, edata.data); + return 0; +} + +static int ethtool_nway_reset(struct net_device *dev) +{ + if (!ethtool_ops->nway_reset) + return -EOPNOTSUPP; + + return ethtool_ops->nway_reset(dev); +} + +static int ethtool_get_link(struct net_device *dev, void *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GLINK }; + + if (!ethtool_ops->get_link) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_link(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_get_eeprom(struct net_device *dev, void *useraddr) +{ + struct ethtool_eeprom eeprom; + struct ethtool_ops *ops = ethtool_ops; + u8 *data; + int ret; + + if (!ops->get_eeprom || !ops->get_eeprom_len) + return -EOPNOTSUPP; + + if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) + return -EFAULT; + + /* Check for wrap and zero */ + if (eeprom.offset + eeprom.len <= eeprom.offset) + return -EINVAL; + + /* Check for exceeding total eeprom len */ + if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) + return -EINVAL; + + data = kmalloc(eeprom.len, GFP_USER); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) + goto out; + + ret = ops->get_eeprom(dev, &eeprom, data); + if (ret) + goto out; + + ret = -EFAULT; + if (copy_to_user(useraddr, &eeprom, sizeof(eeprom))) + goto out; + if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_set_eeprom(struct net_device *dev, void *useraddr) +{ + struct ethtool_eeprom eeprom; + struct ethtool_ops *ops = ethtool_ops; + u8 *data; + int ret; + + if (!ops->set_eeprom || !ops->get_eeprom_len) + return -EOPNOTSUPP; + + if (copy_from_user(&eeprom, useraddr, sizeof(eeprom))) + return -EFAULT; + + /* Check for wrap and zero */ + if (eeprom.offset + eeprom.len <= eeprom.offset) + return -EINVAL; + + /* Check for exceeding total eeprom len */ + if (eeprom.offset + eeprom.len > ops->get_eeprom_len(dev)) + return -EINVAL; + + data = kmalloc(eeprom.len, GFP_USER); + if (!data) + return -ENOMEM; + + ret = -EFAULT; + if (copy_from_user(data, useraddr + sizeof(eeprom), eeprom.len)) + goto out; + + ret = ops->set_eeprom(dev, &eeprom, data); + if (ret) + goto out; + + if (copy_to_user(useraddr + sizeof(eeprom), data, eeprom.len)) + ret = -EFAULT; + +out: + kfree(data); + return ret; +} + +static int ethtool_get_coalesce(struct net_device *dev, void *useraddr) +{ + struct ethtool_coalesce coalesce = { ETHTOOL_GCOALESCE }; + + if (!ethtool_ops->get_coalesce) + return -EOPNOTSUPP; + + ethtool_ops->get_coalesce(dev, &coalesce); + + if (copy_to_user(useraddr, &coalesce, sizeof(coalesce))) + return -EFAULT; + return 0; +} + +static int ethtool_set_coalesce(struct net_device *dev, void *useraddr) +{ + struct ethtool_coalesce coalesce; + + if (!ethtool_ops->get_coalesce) + return -EOPNOTSUPP; + + if (copy_from_user(&coalesce, useraddr, sizeof(coalesce))) + return -EFAULT; + + return ethtool_ops->set_coalesce(dev, &coalesce); +} + +static int ethtool_get_ringparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_ringparam ringparam = { ETHTOOL_GRINGPARAM }; + + if (!ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + + ethtool_ops->get_ringparam(dev, &ringparam); + + if (copy_to_user(useraddr, &ringparam, sizeof(ringparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_ringparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_ringparam ringparam; + + if (!ethtool_ops->get_ringparam) + return -EOPNOTSUPP; + + if (copy_from_user(&ringparam, useraddr, sizeof(ringparam))) + return -EFAULT; + + return ethtool_ops->set_ringparam(dev, &ringparam); +} + +static int ethtool_get_pauseparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_pauseparam pauseparam = { ETHTOOL_GPAUSEPARAM }; + + if (!ethtool_ops->get_pauseparam) + return -EOPNOTSUPP; + + ethtool_ops->get_pauseparam(dev, &pauseparam); + + if (copy_to_user(useraddr, &pauseparam, sizeof(pauseparam))) + return -EFAULT; + return 0; +} + +static int ethtool_set_pauseparam(struct net_device *dev, void *useraddr) +{ + struct ethtool_pauseparam pauseparam; + + if (!ethtool_ops->get_pauseparam) + return -EOPNOTSUPP; + + if (copy_from_user(&pauseparam, useraddr, sizeof(pauseparam))) + return -EFAULT; + + return ethtool_ops->set_pauseparam(dev, &pauseparam); +} + +static int ethtool_get_rx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GRXCSUM }; + + if (!ethtool_ops->get_rx_csum) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_rx_csum(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_rx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_rx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + ethtool_ops->set_rx_csum(dev, edata.data); + return 0; +} + +static int ethtool_get_tx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTXCSUM }; + + if (!ethtool_ops->get_tx_csum) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_tx_csum(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_tx_csum(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_tx_csum) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return ethtool_ops->set_tx_csum(dev, edata.data); +} + +static int ethtool_get_sg(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GSG }; + + if (!ethtool_ops->get_sg) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_sg(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_sg(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_sg) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return ethtool_ops->set_sg(dev, edata.data); +} + +static int ethtool_get_tso(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata = { ETHTOOL_GTSO }; + + if (!ethtool_ops->get_tso) + return -EOPNOTSUPP; + + edata.data = ethtool_ops->get_tso(dev); + + if (copy_to_user(useraddr, &edata, sizeof(edata))) + return -EFAULT; + return 0; +} + +static int ethtool_set_tso(struct net_device *dev, char *useraddr) +{ + struct ethtool_value edata; + + if (!ethtool_ops->set_tso) + return -EOPNOTSUPP; + + if (copy_from_user(&edata, useraddr, sizeof(edata))) + return -EFAULT; + + return ethtool_ops->set_tso(dev, edata.data); +} + +static int ethtool_self_test(struct net_device *dev, char *useraddr) +{ + struct ethtool_test test; + struct ethtool_ops *ops = ethtool_ops; + u64 *data; + int ret; + + if (!ops->self_test || !ops->self_test_count) + return -EOPNOTSUPP; + + if (copy_from_user(&test, useraddr, sizeof(test))) + return -EFAULT; + + test.len = ops->self_test_count(dev); + data = kmalloc(test.len * sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + ops->self_test(dev, &test, data); + + ret = -EFAULT; + if (copy_to_user(useraddr, &test, sizeof(test))) + goto out; + useraddr += sizeof(test); + if (copy_to_user(useraddr, data, test.len * sizeof(u64))) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_get_strings(struct net_device *dev, void *useraddr) +{ + struct ethtool_gstrings gstrings; + struct ethtool_ops *ops = ethtool_ops; + u8 *data; + int ret; + + if (!ops->get_strings) + return -EOPNOTSUPP; + + if (copy_from_user(&gstrings, useraddr, sizeof(gstrings))) + return -EFAULT; + + switch (gstrings.string_set) { + case ETH_SS_TEST: + if (!ops->self_test_count) + return -EOPNOTSUPP; + gstrings.len = ops->self_test_count(dev); + break; + case ETH_SS_STATS: + if (!ops->get_stats_count) + return -EOPNOTSUPP; + gstrings.len = ops->get_stats_count(dev); + break; + default: + return -EINVAL; + } + + data = kmalloc(gstrings.len * ETH_GSTRING_LEN, GFP_USER); + if (!data) + return -ENOMEM; + + ops->get_strings(dev, gstrings.string_set, data); + + ret = -EFAULT; + if (copy_to_user(useraddr, &gstrings, sizeof(gstrings))) + goto out; + useraddr += sizeof(gstrings); + if (copy_to_user(useraddr, data, gstrings.len * ETH_GSTRING_LEN)) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +static int ethtool_phys_id(struct net_device *dev, void *useraddr) +{ + struct ethtool_value id; + + if (!ethtool_ops->phys_id) + return -EOPNOTSUPP; + + if (copy_from_user(&id, useraddr, sizeof(id))) + return -EFAULT; + + return ethtool_ops->phys_id(dev, id.data); +} + +static int ethtool_get_stats(struct net_device *dev, void *useraddr) +{ + struct ethtool_stats stats; + struct ethtool_ops *ops = ethtool_ops; + u64 *data; + int ret; + + if (!ops->get_ethtool_stats || !ops->get_stats_count) + return -EOPNOTSUPP; + + if (copy_from_user(&stats, useraddr, sizeof(stats))) + return -EFAULT; + + stats.n_stats = ops->get_stats_count(dev); + data = kmalloc(stats.n_stats * sizeof(u64), GFP_USER); + if (!data) + return -ENOMEM; + + ops->get_ethtool_stats(dev, &stats, data); + + ret = -EFAULT; + if (copy_to_user(useraddr, &stats, sizeof(stats))) + goto out; + useraddr += sizeof(stats); + if (copy_to_user(useraddr, data, stats.n_stats * sizeof(u64))) + goto out; + ret = 0; + +out: + kfree(data); + return ret; +} + +/* The main entry point in this file. Called from net/core/dev.c */ + +#define ETHTOOL_OPS_COMPAT +int ethtool_ioctl(struct ifreq *ifr) +{ + struct net_device *dev = __dev_get_by_name(ifr->ifr_name); + void *useraddr = (void *) ifr->ifr_data; + u32 ethcmd; + + /* + * XXX: This can be pushed down into the ethtool_* handlers that + * need it. Keep existing behavior for the moment. + */ + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (!dev || !netif_device_present(dev)) + return -ENODEV; + + if (copy_from_user(ðcmd, useraddr, sizeof (ethcmd))) + return -EFAULT; + + switch (ethcmd) { + case ETHTOOL_GSET: + return ethtool_get_settings(dev, useraddr); + case ETHTOOL_SSET: + return ethtool_set_settings(dev, useraddr); + case ETHTOOL_GDRVINFO: + return ethtool_get_drvinfo(dev, useraddr); + case ETHTOOL_GREGS: + return ethtool_get_regs(dev, useraddr); + case ETHTOOL_GWOL: + return ethtool_get_wol(dev, useraddr); + case ETHTOOL_SWOL: + return ethtool_set_wol(dev, useraddr); + case ETHTOOL_GMSGLVL: + return ethtool_get_msglevel(dev, useraddr); + case ETHTOOL_SMSGLVL: + return ethtool_set_msglevel(dev, useraddr); + case ETHTOOL_NWAY_RST: + return ethtool_nway_reset(dev); + case ETHTOOL_GLINK: + return ethtool_get_link(dev, useraddr); + case ETHTOOL_GEEPROM: + return ethtool_get_eeprom(dev, useraddr); + case ETHTOOL_SEEPROM: + return ethtool_set_eeprom(dev, useraddr); + case ETHTOOL_GCOALESCE: + return ethtool_get_coalesce(dev, useraddr); + case ETHTOOL_SCOALESCE: + return ethtool_set_coalesce(dev, useraddr); + case ETHTOOL_GRINGPARAM: + return ethtool_get_ringparam(dev, useraddr); + case ETHTOOL_SRINGPARAM: + return ethtool_set_ringparam(dev, useraddr); + case ETHTOOL_GPAUSEPARAM: + return ethtool_get_pauseparam(dev, useraddr); + case ETHTOOL_SPAUSEPARAM: + return ethtool_set_pauseparam(dev, useraddr); + case ETHTOOL_GRXCSUM: + return ethtool_get_rx_csum(dev, useraddr); + case ETHTOOL_SRXCSUM: + return ethtool_set_rx_csum(dev, useraddr); + case ETHTOOL_GTXCSUM: + return ethtool_get_tx_csum(dev, useraddr); + case ETHTOOL_STXCSUM: + return ethtool_set_tx_csum(dev, useraddr); + case ETHTOOL_GSG: + return ethtool_get_sg(dev, useraddr); + case ETHTOOL_SSG: + return ethtool_set_sg(dev, useraddr); + case ETHTOOL_GTSO: + return ethtool_get_tso(dev, useraddr); + case ETHTOOL_STSO: + return ethtool_set_tso(dev, useraddr); + case ETHTOOL_TEST: + return ethtool_self_test(dev, useraddr); + case ETHTOOL_GSTRINGS: + return ethtool_get_strings(dev, useraddr); + case ETHTOOL_PHYS_ID: + return ethtool_phys_id(dev, useraddr); + case ETHTOOL_GSTATS: + return ethtool_get_stats(dev, useraddr); + default: + return -EOPNOTSUPP; + } + + return -EOPNOTSUPP; +} + +#define mii_if_info _kc_mii_if_info +struct _kc_mii_if_info { + int phy_id; + int advertising; + int phy_id_mask; + int reg_num_mask; + + unsigned int full_duplex : 1; /* is full duplex? */ + unsigned int force_media : 1; /* is autoneg. disabled? */ + + struct net_device *dev; + int (*mdio_read) (struct net_device *dev, int phy_id, int location); + void (*mdio_write) (struct net_device *dev, int phy_id, int location, int val); +}; + +struct ethtool_cmd; +struct mii_ioctl_data; + +#undef mii_link_ok +#define mii_link_ok _kc_mii_link_ok +#undef mii_nway_restart +#define mii_nway_restart _kc_mii_nway_restart +#undef mii_ethtool_gset +#define mii_ethtool_gset _kc_mii_ethtool_gset +#undef mii_ethtool_sset +#define mii_ethtool_sset _kc_mii_ethtool_sset +#undef mii_check_link +#define mii_check_link _kc_mii_check_link +extern int _kc_mii_link_ok (struct mii_if_info *mii); +extern int _kc_mii_nway_restart (struct mii_if_info *mii); +extern int _kc_mii_ethtool_gset(struct mii_if_info *mii, + struct ethtool_cmd *ecmd); +extern int _kc_mii_ethtool_sset(struct mii_if_info *mii, + struct ethtool_cmd *ecmd); +extern void _kc_mii_check_link (struct mii_if_info *mii); +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) +#undef generic_mii_ioctl +#define generic_mii_ioctl _kc_generic_mii_ioctl +extern int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, + struct mii_ioctl_data *mii_data, int cmd, + unsigned int *duplex_changed); +#endif /* > 2.4.6 */ + + +struct _kc_pci_dev_ext { + struct pci_dev *dev; + void *pci_drvdata; + struct pci_driver *driver; +}; + +struct _kc_net_dev_ext { + struct net_device *dev; + unsigned int carrier; +}; + + +/**************************************/ +/* mii support */ + +int _kc_mii_ethtool_gset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) +{ + struct net_device *dev = mii->dev; + u32 advert, bmcr, lpa, nego; + + ecmd->supported = + (SUPPORTED_10baseT_Half | SUPPORTED_10baseT_Full | + SUPPORTED_100baseT_Half | SUPPORTED_100baseT_Full | + SUPPORTED_Autoneg | SUPPORTED_TP | SUPPORTED_MII); + + /* only supports twisted-pair */ + ecmd->port = PORT_MII; + + /* only supports internal transceiver */ + ecmd->transceiver = XCVR_INTERNAL; + + /* this isn't fully supported at higher layers */ + ecmd->phy_address = mii->phy_id; + + ecmd->advertising = ADVERTISED_TP | ADVERTISED_MII; + advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); + if (advert & ADVERTISE_10HALF) + ecmd->advertising |= ADVERTISED_10baseT_Half; + if (advert & ADVERTISE_10FULL) + ecmd->advertising |= ADVERTISED_10baseT_Full; + if (advert & ADVERTISE_100HALF) + ecmd->advertising |= ADVERTISED_100baseT_Half; + if (advert & ADVERTISE_100FULL) + ecmd->advertising |= ADVERTISED_100baseT_Full; + + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + lpa = mii->mdio_read(dev, mii->phy_id, MII_LPA); + if (bmcr & BMCR_ANENABLE) { + ecmd->advertising |= ADVERTISED_Autoneg; + ecmd->autoneg = AUTONEG_ENABLE; + + nego = mii_nway_result(advert & lpa); + if (nego == LPA_100FULL || nego == LPA_100HALF) + ecmd->speed = SPEED_100; + else + ecmd->speed = SPEED_10; + if (nego == LPA_100FULL || nego == LPA_10FULL) { + ecmd->duplex = DUPLEX_FULL; + mii->full_duplex = 1; + } else { + ecmd->duplex = DUPLEX_HALF; + mii->full_duplex = 0; + } + } else { + ecmd->autoneg = AUTONEG_DISABLE; + + ecmd->speed = (bmcr & BMCR_SPEED100) ? SPEED_100 : SPEED_10; + ecmd->duplex = (bmcr & BMCR_FULLDPLX) ? DUPLEX_FULL : DUPLEX_HALF; + } + + /* ignore maxtxpkt, maxrxpkt for now */ + + return 0; +} + +int _kc_mii_ethtool_sset(struct mii_if_info *mii, struct ethtool_cmd *ecmd) +{ + struct net_device *dev = mii->dev; + + if (ecmd->speed != SPEED_10 && ecmd->speed != SPEED_100) + return -EINVAL; + if (ecmd->duplex != DUPLEX_HALF && ecmd->duplex != DUPLEX_FULL) + return -EINVAL; + if (ecmd->port != PORT_MII) + return -EINVAL; + if (ecmd->transceiver != XCVR_INTERNAL) + return -EINVAL; + if (ecmd->phy_address != mii->phy_id) + return -EINVAL; + if (ecmd->autoneg != AUTONEG_DISABLE && ecmd->autoneg != AUTONEG_ENABLE) + return -EINVAL; + + /* ignore supported, maxtxpkt, maxrxpkt */ + + if (ecmd->autoneg == AUTONEG_ENABLE) { + u32 bmcr, advert, tmp; + + if ((ecmd->advertising & (ADVERTISED_10baseT_Half | + ADVERTISED_10baseT_Full | + ADVERTISED_100baseT_Half | + ADVERTISED_100baseT_Full)) == 0) + return -EINVAL; + + /* advertise only what has been requested */ + advert = mii->mdio_read(dev, mii->phy_id, MII_ADVERTISE); + tmp = advert & ~(ADVERTISE_ALL | ADVERTISE_100BASE4); + if (ADVERTISED_10baseT_Half) + tmp |= ADVERTISE_10HALF; + if (ADVERTISED_10baseT_Full) + tmp |= ADVERTISE_10FULL; + if (ADVERTISED_100baseT_Half) + tmp |= ADVERTISE_100HALF; + if (ADVERTISED_100baseT_Full) + tmp |= ADVERTISE_100FULL; + if (advert != tmp) { + mii->mdio_write(dev, mii->phy_id, MII_ADVERTISE, tmp); + mii->advertising = tmp; + } + + /* turn on autonegotiation, and force a renegotiate */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + bmcr |= (BMCR_ANENABLE | BMCR_ANRESTART); + mii->mdio_write(dev, mii->phy_id, MII_BMCR, bmcr); + + mii->force_media = 0; + } else { + u32 bmcr, tmp; + + /* turn off auto negotiation, set speed and duplexity */ + bmcr = mii->mdio_read(dev, mii->phy_id, MII_BMCR); + tmp = bmcr & ~(BMCR_ANENABLE | BMCR_SPEED100 | BMCR_FULLDPLX); + if (ecmd->speed == SPEED_100) + tmp |= BMCR_SPEED100; + if (ecmd->duplex == DUPLEX_FULL) { + tmp |= BMCR_FULLDPLX; + mii->full_duplex = 1; + } else + mii->full_duplex = 0; + if (bmcr != tmp) + mii->mdio_write(dev, mii->phy_id, MII_BMCR, tmp); + + mii->force_media = 1; + } + return 0; +} + +int _kc_mii_link_ok (struct mii_if_info *mii) +{ + /* first, a dummy read, needed to latch some MII phys */ + mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR); + if (mii->mdio_read(mii->dev, mii->phy_id, MII_BMSR) & BMSR_LSTATUS) + return 1; + return 0; +} + +int _kc_mii_nway_restart (struct mii_if_info *mii) +{ + int bmcr; + int r = -EINVAL; + + /* if autoneg is off, it's an error */ + bmcr = mii->mdio_read(mii->dev, mii->phy_id, MII_BMCR); + + if (bmcr & BMCR_ANENABLE) { + bmcr |= BMCR_ANRESTART; + mii->mdio_write(mii->dev, mii->phy_id, MII_BMCR, bmcr); + r = 0; + } + + return r; +} + +void _kc_mii_check_link (struct mii_if_info *mii) +{ + int cur_link = mii_link_ok(mii); + int prev_link = netif_carrier_ok(mii->dev); + + if (cur_link && !prev_link) + netif_carrier_on(mii->dev); + else if (prev_link && !cur_link) + netif_carrier_off(mii->dev); +} + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,6) ) +int _kc_generic_mii_ioctl(struct mii_if_info *mii_if, + struct mii_ioctl_data *mii_data, int cmd, + unsigned int *duplex_chg_out) +{ + int rc = 0; + unsigned int duplex_changed = 0; + + if (duplex_chg_out) + *duplex_chg_out = 0; + + mii_data->phy_id &= mii_if->phy_id_mask; + mii_data->reg_num &= mii_if->reg_num_mask; + + switch(cmd) { + case SIOCDEVPRIVATE: /* binary compat, remove in 2.5 */ + case SIOCGMIIPHY: + mii_data->phy_id = mii_if->phy_id; + /* fall through */ + + case SIOCDEVPRIVATE + 1:/* binary compat, remove in 2.5 */ + case SIOCGMIIREG: + mii_data->val_out = + mii_if->mdio_read(mii_if->dev, mii_data->phy_id, + mii_data->reg_num); + break; + + case SIOCDEVPRIVATE + 2:/* binary compat, remove in 2.5 */ + case SIOCSMIIREG: { + u16 val = mii_data->val_in; + + if (!capable(CAP_NET_ADMIN)) + return -EPERM; + + if (mii_data->phy_id == mii_if->phy_id) { + switch(mii_data->reg_num) { + case MII_BMCR: { + unsigned int new_duplex = 0; + if (val & (BMCR_RESET|BMCR_ANENABLE)) + mii_if->force_media = 0; + else + mii_if->force_media = 1; + if (mii_if->force_media && + (val & BMCR_FULLDPLX)) + new_duplex = 1; + if (mii_if->full_duplex != new_duplex) { + duplex_changed = 1; + mii_if->full_duplex = new_duplex; + } + break; + } + case MII_ADVERTISE: + mii_if->advertising = val; + break; + default: + /* do nothing */ + break; + } + } + + mii_if->mdio_write(mii_if->dev, mii_data->phy_id, + mii_data->reg_num, val); + break; + } + + default: + rc = -EOPNOTSUPP; + break; + } + + if ((rc == 0) && (duplex_chg_out) && (duplex_changed)) + *duplex_chg_out = 1; + + return rc; +} +#endif /* > 2.4.6 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING new file mode 100644 index 00000000..5f297e5b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/COPYING @@ -0,0 +1,339 @@ + +"This software program is licensed subject to the GNU General Public License +(GPL). Version 2, June 1991, available at +" + +GNU General Public License + +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +59 Temple Place - Suite 330, Boston, MA 02111-1307, USA + +Everyone is permitted to copy and distribute verbatim copies of this license +document, but changing it is not allowed. + +Preamble + +The licenses for most software are designed to take away your freedom to +share and change it. By contrast, the GNU General Public License is intended +to guarantee your freedom to share and change free software--to make sure +the software is free for all its users. This General Public License applies +to most of the Free Software Foundation's software and to any other program +whose authors commit to using it. (Some other Free Software Foundation +software is covered by the GNU Library General Public License instead.) You +can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our +General Public Licenses are designed to make sure that you have the freedom +to distribute copies of free software (and charge for this service if you +wish), that you receive source code or can get it if you want it, that you +can change the software or use pieces of it in new free programs; and that +you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to +deny you these rights or to ask you to surrender the rights. These +restrictions translate to certain responsibilities for you if you distribute +copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or +for a fee, you must give the recipients all the rights that you have. You +must make sure that they, too, receive or can get the source code. And you +must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) +offer you this license which gives you legal permission to copy, distribute +and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that +everyone understands that there is no warranty for this free software. If +the software is modified by someone else and passed on, we want its +recipients to know that what they have is not the original, so that any +problems introduced by others will not reflect on the original authors' +reputations. + +Finally, any free program is threatened constantly by software patents. We +wish to avoid the danger that redistributors of a free program will +individually obtain patent licenses, in effect making the program +proprietary. To prevent this, we have made it clear that any patent must be +licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification +follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION + +0. This License applies to any program or other work which contains a notice + placed by the copyright holder saying it may be distributed under the + terms of this General Public License. The "Program", below, refers to any + such program or work, and a "work based on the Program" means either the + Program or any derivative work under copyright law: that is to say, a + work containing the Program or a portion of it, either verbatim or with + modifications and/or translated into another language. (Hereinafter, + translation is included without limitation in the term "modification".) + Each licensee is addressed as "you". + + Activities other than copying, distribution and modification are not + covered by this License; they are outside its scope. The act of running + the Program is not restricted, and the output from the Program is covered + only if its contents constitute a work based on the Program (independent + of having been made by running the Program). Whether that is true depends + on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code + as you receive it, in any medium, provided that you conspicuously and + appropriately publish on each copy an appropriate copyright notice and + disclaimer of warranty; keep intact all the notices that refer to this + License and to the absence of any warranty; and give any other recipients + of the Program a copy of this License along with the Program. + + You may charge a fee for the physical act of transferring a copy, and you + may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, + thus forming a work based on the Program, and copy and distribute such + modifications or work under the terms of Section 1 above, provided that + you also meet all of these conditions: + + * a) You must cause the modified files to carry prominent notices stating + that you changed the files and the date of any change. + + * b) You must cause any work that you distribute or publish, that in + whole or in part contains or is derived from the Program or any part + thereof, to be licensed as a whole at no charge to all third parties + under the terms of this License. + + * c) If the modified program normally reads commands interactively when + run, you must cause it, when started running for such interactive + use in the most ordinary way, to print or display an announcement + including an appropriate copyright notice and a notice that there is + no warranty (or else, saying that you provide a warranty) and that + users may redistribute the program under these conditions, and + telling the user how to view a copy of this License. (Exception: if + the Program itself is interactive but does not normally print such + an announcement, your work based on the Program is not required to + print an announcement.) + + These requirements apply to the modified work as a whole. If identifiable + sections of that work are not derived from the Program, and can be + reasonably considered independent and separate works in themselves, then + this License, and its terms, do not apply to those sections when you + distribute them as separate works. But when you distribute the same + sections as part of a whole which is a work based on the Program, the + distribution of the whole must be on the terms of this License, whose + permissions for other licensees extend to the entire whole, and thus to + each and every part regardless of who wrote it. + + Thus, it is not the intent of this section to claim rights or contest + your rights to work written entirely by you; rather, the intent is to + exercise the right to control the distribution of derivative or + collective works based on the Program. + + In addition, mere aggregation of another work not based on the Program + with the Program (or with a work based on the Program) on a volume of a + storage or distribution medium does not bring the other work under the + scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under + Section 2) in object code or executable form under the terms of Sections + 1 and 2 above provided that you also do one of the following: + + * a) Accompany it with the complete corresponding machine-readable source + code, which must be distributed under the terms of Sections 1 and 2 + above on a medium customarily used for software interchange; or, + + * b) Accompany it with a written offer, valid for at least three years, + to give any third party, for a charge no more than your cost of + physically performing source distribution, a complete machine- + readable copy of the corresponding source code, to be distributed + under the terms of Sections 1 and 2 above on a medium customarily + used for software interchange; or, + + * c) Accompany it with the information you received as to the offer to + distribute corresponding source code. (This alternative is allowed + only for noncommercial distribution and only if you received the + program in object code or executable form with such an offer, in + accord with Subsection b above.) + + The source code for a work means the preferred form of the work for + making modifications to it. For an executable work, complete source code + means all the source code for all modules it contains, plus any + associated interface definition files, plus the scripts used to control + compilation and installation of the executable. However, as a special + exception, the source code distributed need not include anything that is + normally distributed (in either source or binary form) with the major + components (compiler, kernel, and so on) of the operating system on which + the executable runs, unless that component itself accompanies the + executable. + + If distribution of executable or object code is made by offering access + to copy from a designated place, then offering equivalent access to copy + the source code from the same place counts as distribution of the source + code, even though third parties are not compelled to copy the source + along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as + expressly provided under this License. Any attempt otherwise to copy, + modify, sublicense or distribute the Program is void, and will + automatically terminate your rights under this License. However, parties + who have received copies, or rights, from you under this License will not + have their licenses terminated so long as such parties remain in full + compliance. + +5. You are not required to accept this License, since you have not signed + it. However, nothing else grants you permission to modify or distribute + the Program or its derivative works. These actions are prohibited by law + if you do not accept this License. Therefore, by modifying or + distributing the Program (or any work based on the Program), you + indicate your acceptance of this License to do so, and all its terms and + conditions for copying, distributing or modifying the Program or works + based on it. + +6. Each time you redistribute the Program (or any work based on the + Program), the recipient automatically receives a license from the + original licensor to copy, distribute or modify the Program subject to + these terms and conditions. You may not impose any further restrictions + on the recipients' exercise of the rights granted herein. You are not + responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent + infringement or for any other reason (not limited to patent issues), + conditions are imposed on you (whether by court order, agreement or + otherwise) that contradict the conditions of this License, they do not + excuse you from the conditions of this License. If you cannot distribute + so as to satisfy simultaneously your obligations under this License and + any other pertinent obligations, then as a consequence you may not + distribute the Program at all. For example, if a patent license would + not permit royalty-free redistribution of the Program by all those who + receive copies directly or indirectly through you, then the only way you + could satisfy both it and this License would be to refrain entirely from + distribution of the Program. + + If any portion of this section is held invalid or unenforceable under any + particular circumstance, the balance of the section is intended to apply + and the section as a whole is intended to apply in other circumstances. + + It is not the purpose of this section to induce you to infringe any + patents or other property right claims or to contest validity of any + such claims; this section has the sole purpose of protecting the + integrity of the free software distribution system, which is implemented + by public license practices. Many people have made generous contributions + to the wide range of software distributed through that system in + reliance on consistent application of that system; it is up to the + author/donor to decide if he or she is willing to distribute software + through any other system and a licensee cannot impose that choice. + + This section is intended to make thoroughly clear what is believed to be + a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain + countries either by patents or by copyrighted interfaces, the original + copyright holder who places the Program under this License may add an + explicit geographical distribution limitation excluding those countries, + so that distribution is permitted only in or among countries not thus + excluded. In such case, this License incorporates the limitation as if + written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of + the General Public License from time to time. Such new versions will be + similar in spirit to the present version, but may differ in detail to + address new problems or concerns. + + Each version is given a distinguishing version number. If the Program + specifies a version number of this License which applies to it and "any + later version", you have the option of following the terms and + conditions either of that version or of any later version published by + the Free Software Foundation. If the Program does not specify a version + number of this License, you may choose any version ever published by the + Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs + whose distribution conditions are different, write to the author to ask + for permission. For software which is copyrighted by the Free Software + Foundation, write to the Free Software Foundation; we sometimes make + exceptions for this. Our decision will be guided by the two goals of + preserving the free status of all derivatives of our free software and + of promoting the sharing and reuse of software generally. + + NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY + FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN + OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES + PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER + EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE + ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH + YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL + NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING + WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR + REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR + DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL + DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM + (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED + INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF + THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR + OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + +How to Apply These Terms to Your New Programs + +If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it free +software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to +attach them to the start of each source file to most effectively convey the +exclusion of warranty; and each file should have at least the "copyright" +line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2 of the License, or (at your option) +any later version. + +This program is distributed in the hope that it will be useful, but WITHOUT +ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +more details. + +You should have received a copy of the GNU General Public License along with +this program; if not, write to the Free Software Foundation, Inc., 59 +Temple Place - Suite 330, Boston, MA 02111-1307, USA. + +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when +it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author Gnomovision comes +with ABSOLUTELY NO WARRANTY; for details type 'show w'. This is free +software, and you are welcome to redistribute it under certain conditions; +type 'show c' for details. + +The hypothetical commands 'show w' and 'show c' should show the appropriate +parts of the General Public License. Of course, the commands you use may be +called something other than 'show w' and 'show c'; they could even be +mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your +school, if any, to sign a "copyright disclaimer" for the program, if +necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in the program +'Gnomovision' (which makes passes at compilers) written by James Hacker. + +signature of Ty Coon, 1 April 1989 +Ty Coon, President of Vice + +This General Public License does not permit incorporating your program into +proprietary programs. If your program is a subroutine library, you may +consider it more useful to permit linking proprietary applications with the +library. If this is what you want to do, use the GNU Library General Public +License instead of this License. diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h new file mode 100644 index 00000000..222c2c71 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe.h @@ -0,0 +1,925 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_H_ +#define _IXGBE_H_ + +#ifndef IXGBE_NO_LRO +#include +#endif + +#include +#include +#ifdef HAVE_IRQ_AFFINITY_HINT +#include +#endif /* HAVE_IRQ_AFFINITY_HINT */ +#include + +#ifdef SIOCETHTOOL +#include +#endif +#ifdef NETIF_F_HW_VLAN_TX +#include +#endif +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +#define IXGBE_DCA +#include +#endif +#include "ixgbe_dcb.h" + +#include "kcompat.h" + +#ifdef HAVE_SCTP +#include +#endif + +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#define IXGBE_FCOE +#include "ixgbe_fcoe.h" +#endif /* CONFIG_FCOE or CONFIG_FCOE_MODULE */ + +#if defined(CONFIG_PTP_1588_CLOCK) || defined(CONFIG_PTP_1588_CLOCK_MODULE) +#define HAVE_IXGBE_PTP +#endif + +#include "ixgbe_api.h" + +#define PFX "ixgbe: " +#define DPRINTK(nlevel, klevel, fmt, args...) \ + ((void)((NETIF_MSG_##nlevel & adapter->msg_enable) && \ + printk(KERN_##klevel PFX "%s: %s: " fmt, adapter->netdev->name, \ + __func__ , ## args))) + +/* TX/RX descriptor defines */ +#define IXGBE_DEFAULT_TXD 512 +#define IXGBE_DEFAULT_TX_WORK 256 +#define IXGBE_MAX_TXD 4096 +#define IXGBE_MIN_TXD 64 + +#define IXGBE_DEFAULT_RXD 512 +#define IXGBE_DEFAULT_RX_WORK 256 +#define IXGBE_MAX_RXD 4096 +#define IXGBE_MIN_RXD 64 + + +/* flow control */ +#define IXGBE_MIN_FCRTL 0x40 +#define IXGBE_MAX_FCRTL 0x7FF80 +#define IXGBE_MIN_FCRTH 0x600 +#define IXGBE_MAX_FCRTH 0x7FFF0 +#define IXGBE_DEFAULT_FCPAUSE 0xFFFF +#define IXGBE_MIN_FCPAUSE 0 +#define IXGBE_MAX_FCPAUSE 0xFFFF + +/* Supported Rx Buffer Sizes */ +#define IXGBE_RXBUFFER_512 512 /* Used for packet split */ +#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT +#define IXGBE_RXBUFFER_1536 1536 +#define IXGBE_RXBUFFER_2K 2048 +#define IXGBE_RXBUFFER_3K 3072 +#define IXGBE_RXBUFFER_4K 4096 +#define IXGBE_RXBUFFER_7K 7168 +#define IXGBE_RXBUFFER_8K 8192 +#define IXGBE_RXBUFFER_15K 15360 +#endif /* CONFIG_IXGBE_DISABLE_PACKET_SPLIT */ +#define IXGBE_MAX_RXBUFFER 16384 /* largest size for single descriptor */ + +/* + * NOTE: netdev_alloc_skb reserves up to 64 bytes, NET_IP_ALIGN mans we + * reserve 2 more, and skb_shared_info adds an additional 384 bytes more, + * this adds up to 512 bytes of extra data meaning the smallest allocation + * we could have is 1K. + * i.e. RXBUFFER_512 --> size-1024 slab + */ +#define IXGBE_RX_HDR_SIZE IXGBE_RXBUFFER_512 + +#define MAXIMUM_ETHERNET_VLAN_SIZE (VLAN_ETH_FRAME_LEN + ETH_FCS_LEN) + +/* How many Rx Buffers do we bundle into one write to the hardware ? */ +#define IXGBE_RX_BUFFER_WRITE 16 /* Must be power of 2 */ + +#define IXGBE_TX_FLAGS_CSUM (u32)(1) +#define IXGBE_TX_FLAGS_HW_VLAN (u32)(1 << 1) +#define IXGBE_TX_FLAGS_SW_VLAN (u32)(1 << 2) +#define IXGBE_TX_FLAGS_TSO (u32)(1 << 3) +#define IXGBE_TX_FLAGS_IPV4 (u32)(1 << 4) +#define IXGBE_TX_FLAGS_FCOE (u32)(1 << 5) +#define IXGBE_TX_FLAGS_FSO (u32)(1 << 6) +#define IXGBE_TX_FLAGS_TXSW (u32)(1 << 7) +#define IXGBE_TX_FLAGS_TSTAMP (u32)(1 << 8) +#define IXGBE_TX_FLAGS_VLAN_MASK 0xffff0000 +#define IXGBE_TX_FLAGS_VLAN_PRIO_MASK 0xe0000000 +#define IXGBE_TX_FLAGS_VLAN_PRIO_SHIFT 29 +#define IXGBE_TX_FLAGS_VLAN_SHIFT 16 + +#define IXGBE_MAX_RX_DESC_POLL 10 + +#define IXGBE_MAX_VF_MC_ENTRIES 30 +#define IXGBE_MAX_VF_FUNCTIONS 64 +#define IXGBE_MAX_VFTA_ENTRIES 128 +#define MAX_EMULATION_MAC_ADDRS 16 +#define IXGBE_MAX_PF_MACVLANS 15 +#define IXGBE_82599_VF_DEVICE_ID 0x10ED +#define IXGBE_X540_VF_DEVICE_ID 0x1515 + +#ifdef CONFIG_PCI_IOV +#define VMDQ_P(p) ((p) + adapter->num_vfs) +#else +#define VMDQ_P(p) (p) +#endif + +#define UPDATE_VF_COUNTER_32bit(reg, last_counter, counter) \ + { \ + u32 current_counter = IXGBE_READ_REG(hw, reg); \ + if (current_counter < last_counter) \ + counter += 0x100000000LL; \ + last_counter = current_counter; \ + counter &= 0xFFFFFFFF00000000LL; \ + counter |= current_counter; \ + } + +#define UPDATE_VF_COUNTER_36bit(reg_lsb, reg_msb, last_counter, counter) \ + { \ + u64 current_counter_lsb = IXGBE_READ_REG(hw, reg_lsb); \ + u64 current_counter_msb = IXGBE_READ_REG(hw, reg_msb); \ + u64 current_counter = (current_counter_msb << 32) | \ + current_counter_lsb; \ + if (current_counter < last_counter) \ + counter += 0x1000000000LL; \ + last_counter = current_counter; \ + counter &= 0xFFFFFFF000000000LL; \ + counter |= current_counter; \ + } + +struct vf_stats { + u64 gprc; + u64 gorc; + u64 gptc; + u64 gotc; + u64 mprc; +}; + +struct vf_data_storage { + unsigned char vf_mac_addresses[ETH_ALEN]; + u16 vf_mc_hashes[IXGBE_MAX_VF_MC_ENTRIES]; + u16 num_vf_mc_hashes; + u16 default_vf_vlan_id; + u16 vlans_enabled; + bool clear_to_send; + struct vf_stats vfstats; + struct vf_stats last_vfstats; + struct vf_stats saved_rst_vfstats; + bool pf_set_mac; + u16 pf_vlan; /* When set, guest VLAN config not allowed. */ + u16 pf_qos; + u16 tx_rate; + u16 vlan_count; + u8 spoofchk_enabled; + struct pci_dev *vfdev; +}; + +struct vf_macvlans { + struct list_head l; + int vf; + bool free; + bool is_macvlan; + u8 vf_macvlan[ETH_ALEN]; +}; + +#ifndef IXGBE_NO_LRO +#define IXGBE_LRO_MAX 32 /*Maximum number of LRO descriptors*/ +#define IXGBE_LRO_GLOBAL 10 + +struct ixgbe_lro_stats { + u32 flushed; + u32 coal; +}; + +/* + * ixgbe_lro_header - header format to be aggregated by LRO + * @iph: IP header without options + * @tcp: TCP header + * @ts: Optional TCP timestamp data in TCP options + * + * This structure relies on the check above that verifies that the header + * is IPv4 and does not contain any options. + */ +struct ixgbe_lrohdr { + struct iphdr iph; + struct tcphdr th; + __be32 ts[0]; +}; + +struct ixgbe_lro_list { + struct sk_buff_head active; + struct ixgbe_lro_stats stats; +}; + +#endif /* IXGBE_NO_LRO */ +#define IXGBE_MAX_TXD_PWR 14 +#define IXGBE_MAX_DATA_PER_TXD (1 << IXGBE_MAX_TXD_PWR) + +/* Tx Descriptors needed, worst case */ +#define TXD_USE_COUNT(S) DIV_ROUND_UP((S), IXGBE_MAX_DATA_PER_TXD) +#ifdef MAX_SKB_FRAGS +#define DESC_NEEDED ((MAX_SKB_FRAGS * TXD_USE_COUNT(PAGE_SIZE)) + 4) +#else +#define DESC_NEEDED 4 +#endif + +/* wrapper around a pointer to a socket buffer, + * so a DMA handle can be stored along with the buffer */ +struct ixgbe_tx_buffer { + union ixgbe_adv_tx_desc *next_to_watch; + unsigned long time_stamp; + struct sk_buff *skb; + unsigned int bytecount; + unsigned short gso_segs; + __be16 protocol; + DEFINE_DMA_UNMAP_ADDR(dma); + DEFINE_DMA_UNMAP_LEN(len); + u32 tx_flags; +}; + +struct ixgbe_rx_buffer { + struct sk_buff *skb; + dma_addr_t dma; +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + struct page *page; + unsigned int page_offset; +#endif +}; + +struct ixgbe_queue_stats { + u64 packets; + u64 bytes; +}; + +struct ixgbe_tx_queue_stats { + u64 restart_queue; + u64 tx_busy; + u64 tx_done_old; +}; + +struct ixgbe_rx_queue_stats { + u64 rsc_count; + u64 rsc_flush; + u64 non_eop_descs; + u64 alloc_rx_page_failed; + u64 alloc_rx_buff_failed; + u64 csum_err; +}; + +enum ixgbe_ring_state_t { + __IXGBE_TX_FDIR_INIT_DONE, + __IXGBE_TX_DETECT_HANG, + __IXGBE_HANG_CHECK_ARMED, + __IXGBE_RX_RSC_ENABLED, +#ifndef HAVE_NDO_SET_FEATURES + __IXGBE_RX_CSUM_ENABLED, +#endif + __IXGBE_RX_CSUM_UDP_ZERO_ERR, +#ifdef IXGBE_FCOE + __IXGBE_RX_FCOE_BUFSZ, +#endif +}; + +#define check_for_tx_hang(ring) \ + test_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) +#define set_check_for_tx_hang(ring) \ + set_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) +#define clear_check_for_tx_hang(ring) \ + clear_bit(__IXGBE_TX_DETECT_HANG, &(ring)->state) +#ifndef IXGBE_NO_HW_RSC +#define ring_is_rsc_enabled(ring) \ + test_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) +#else +#define ring_is_rsc_enabled(ring) false +#endif +#define set_ring_rsc_enabled(ring) \ + set_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) +#define clear_ring_rsc_enabled(ring) \ + clear_bit(__IXGBE_RX_RSC_ENABLED, &(ring)->state) +#define netdev_ring(ring) (ring->netdev) +#define ring_queue_index(ring) (ring->queue_index) + + +struct ixgbe_ring { + struct ixgbe_ring *next; /* pointer to next ring in q_vector */ + struct ixgbe_q_vector *q_vector; /* backpointer to host q_vector */ + struct net_device *netdev; /* netdev ring belongs to */ + struct device *dev; /* device for DMA mapping */ + void *desc; /* descriptor ring memory */ + union { + struct ixgbe_tx_buffer *tx_buffer_info; + struct ixgbe_rx_buffer *rx_buffer_info; + }; + unsigned long state; + u8 __iomem *tail; + dma_addr_t dma; /* phys. address of descriptor ring */ + unsigned int size; /* length in bytes */ + + u16 count; /* amount of descriptors */ + + u8 queue_index; /* needed for multiqueue queue management */ + u8 reg_idx; /* holds the special value that gets + * the hardware register offset + * associated with this ring, which is + * different for DCB and RSS modes + */ + u16 next_to_use; + u16 next_to_clean; + + union { +#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + u16 rx_buf_len; +#else + u16 next_to_alloc; +#endif + struct { + u8 atr_sample_rate; + u8 atr_count; + }; + }; + + u8 dcb_tc; + struct ixgbe_queue_stats stats; + union { + struct ixgbe_tx_queue_stats tx_stats; + struct ixgbe_rx_queue_stats rx_stats; + }; +} ____cacheline_internodealigned_in_smp; + +enum ixgbe_ring_f_enum { + RING_F_NONE = 0, + RING_F_VMDQ, /* SR-IOV uses the same ring feature */ + RING_F_RSS, + RING_F_FDIR, +#ifdef IXGBE_FCOE + RING_F_FCOE, +#endif /* IXGBE_FCOE */ + RING_F_ARRAY_SIZE /* must be last in enum set */ +}; + +#define IXGBE_MAX_DCB_INDICES 8 +#define IXGBE_MAX_RSS_INDICES 16 +#define IXGBE_MAX_VMDQ_INDICES 64 +#define IXGBE_MAX_FDIR_INDICES 64 +#ifdef IXGBE_FCOE +#define IXGBE_MAX_FCOE_INDICES 8 +#define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) +#define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + IXGBE_MAX_FCOE_INDICES) +#else +#define MAX_RX_QUEUES IXGBE_MAX_FDIR_INDICES +#define MAX_TX_QUEUES IXGBE_MAX_FDIR_INDICES +#endif /* IXGBE_FCOE */ +struct ixgbe_ring_feature { + int indices; + int mask; +}; + +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT +/* + * FCoE requires that all Rx buffers be over 2200 bytes in length. Since + * this is twice the size of a half page we need to double the page order + * for FCoE enabled Rx queues. + */ +#if defined(IXGBE_FCOE) && (PAGE_SIZE < 8192) +static inline unsigned int ixgbe_rx_pg_order(struct ixgbe_ring *ring) +{ + return test_bit(__IXGBE_RX_FCOE_BUFSZ, &ring->state) ? 1 : 0; +} +#else +#define ixgbe_rx_pg_order(_ring) 0 +#endif +#define ixgbe_rx_pg_size(_ring) (PAGE_SIZE << ixgbe_rx_pg_order(_ring)) +#define ixgbe_rx_bufsz(_ring) ((PAGE_SIZE / 2) << ixgbe_rx_pg_order(_ring)) + +#endif +struct ixgbe_ring_container { + struct ixgbe_ring *ring; /* pointer to linked list of rings */ + unsigned int total_bytes; /* total bytes processed this int */ + unsigned int total_packets; /* total packets processed this int */ + u16 work_limit; /* total work allowed per interrupt */ + u8 count; /* total number of rings in vector */ + u8 itr; /* current ITR setting for ring */ +}; + +/* iterator for handling rings in ring container */ +#define ixgbe_for_each_ring(pos, head) \ + for (pos = (head).ring; pos != NULL; pos = pos->next) + +#define MAX_RX_PACKET_BUFFERS ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ + ? 8 : 1) +#define MAX_TX_PACKET_BUFFERS MAX_RX_PACKET_BUFFERS + +/* MAX_MSIX_Q_VECTORS of these are allocated, + * but we only use one per queue-specific vector. + */ +struct ixgbe_q_vector { + struct ixgbe_adapter *adapter; + int cpu; /* CPU for DCA */ + u16 v_idx; /* index of q_vector within array, also used for + * finding the bit in EICR and friends that + * represents the vector for this ring */ + u16 itr; /* Interrupt throttle rate written to EITR */ + struct ixgbe_ring_container rx, tx; + +#ifdef CONFIG_IXGBE_NAPI + struct napi_struct napi; +#endif +#ifndef HAVE_NETDEV_NAPI_LIST + struct net_device poll_dev; +#endif +#ifdef HAVE_IRQ_AFFINITY_HINT + cpumask_t affinity_mask; +#endif +#ifndef IXGBE_NO_LRO + struct ixgbe_lro_list lrolist; /* LRO list for queue vector*/ +#endif + int numa_node; + char name[IFNAMSIZ + 9]; + + /* for dynamic allocation of rings associated with this q_vector */ + struct ixgbe_ring ring[0] ____cacheline_internodealigned_in_smp; +}; + +/* + * microsecond values for various ITR rates shifted by 2 to fit itr register + * with the first 3 bits reserved 0 + */ +#define IXGBE_MIN_RSC_ITR 24 +#define IXGBE_100K_ITR 40 +#define IXGBE_20K_ITR 200 +#define IXGBE_16K_ITR 248 +#define IXGBE_10K_ITR 400 +#define IXGBE_8K_ITR 500 + +/* ixgbe_test_staterr - tests bits in Rx descriptor status and error fields */ +static inline __le32 ixgbe_test_staterr(union ixgbe_adv_rx_desc *rx_desc, + const u32 stat_err_bits) +{ + return rx_desc->wb.upper.status_error & cpu_to_le32(stat_err_bits); +} + +/* ixgbe_desc_unused - calculate if we have unused descriptors */ +static inline u16 ixgbe_desc_unused(struct ixgbe_ring *ring) +{ + u16 ntc = ring->next_to_clean; + u16 ntu = ring->next_to_use; + + return ((ntc > ntu) ? 0 : ring->count) + ntc - ntu - 1; +} + +#define IXGBE_RX_DESC(R, i) \ + (&(((union ixgbe_adv_rx_desc *)((R)->desc))[i])) +#define IXGBE_TX_DESC(R, i) \ + (&(((union ixgbe_adv_tx_desc *)((R)->desc))[i])) +#define IXGBE_TX_CTXTDESC(R, i) \ + (&(((struct ixgbe_adv_tx_context_desc *)((R)->desc))[i])) + +#define IXGBE_MAX_JUMBO_FRAME_SIZE 16128 +#ifdef IXGBE_FCOE +/* use 3K as the baby jumbo frame size for FCoE */ +#define IXGBE_FCOE_JUMBO_FRAME_SIZE 3072 +#endif /* IXGBE_FCOE */ + +#define TCP_TIMER_VECTOR 0 +#define OTHER_VECTOR 1 +#define NON_Q_VECTORS (OTHER_VECTOR + TCP_TIMER_VECTOR) + +#define IXGBE_MAX_MSIX_Q_VECTORS_82599 64 +#define IXGBE_MAX_MSIX_Q_VECTORS_82598 16 + +struct ixgbe_mac_addr { + u8 addr[ETH_ALEN]; + u16 queue; + u16 state; /* bitmask */ +}; +#define IXGBE_MAC_STATE_DEFAULT 0x1 +#define IXGBE_MAC_STATE_MODIFIED 0x2 +#define IXGBE_MAC_STATE_IN_USE 0x4 + +#ifdef IXGBE_PROCFS +struct ixgbe_therm_proc_data { + struct ixgbe_hw *hw; + struct ixgbe_thermal_diode_data *sensor_data; +}; + +#endif /* IXGBE_PROCFS */ + +/* + * Only for array allocations in our adapter struct. On 82598, there will be + * unused entries in the array, but that's not a big deal. Also, in 82599, + * we can actually assign 64 queue vectors based on our extended-extended + * interrupt registers. This is different than 82598, which is limited to 16. + */ +#define MAX_MSIX_Q_VECTORS IXGBE_MAX_MSIX_Q_VECTORS_82599 +#define MAX_MSIX_COUNT IXGBE_MAX_MSIX_VECTORS_82599 + +#define MIN_MSIX_Q_VECTORS 1 +#define MIN_MSIX_COUNT (MIN_MSIX_Q_VECTORS + NON_Q_VECTORS) + +/* default to trying for four seconds */ +#define IXGBE_TRY_LINK_TIMEOUT (4 * HZ) + +/* board specific private data structure */ +struct ixgbe_adapter { +#ifdef NETIF_F_HW_VLAN_TX +#ifdef HAVE_VLAN_RX_REGISTER + struct vlan_group *vlgrp; /* must be first, see ixgbe_receive_skb */ +#else + unsigned long active_vlans[BITS_TO_LONGS(VLAN_N_VID)]; +#endif +#endif /* NETIF_F_HW_VLAN_TX */ + /* OS defined structs */ + struct net_device *netdev; + struct pci_dev *pdev; + + unsigned long state; + + /* Some features need tri-state capability, + * thus the additional *_CAPABLE flags. + */ + u32 flags; +#define IXGBE_FLAG_MSI_CAPABLE (u32)(1 << 0) +#define IXGBE_FLAG_MSI_ENABLED (u32)(1 << 1) +#define IXGBE_FLAG_MSIX_CAPABLE (u32)(1 << 2) +#define IXGBE_FLAG_MSIX_ENABLED (u32)(1 << 3) +#ifndef IXGBE_NO_LLI +#define IXGBE_FLAG_LLI_PUSH (u32)(1 << 4) +#endif +#define IXGBE_FLAG_IN_NETPOLL (u32)(1 << 8) +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +#define IXGBE_FLAG_DCA_ENABLED (u32)(1 << 9) +#define IXGBE_FLAG_DCA_CAPABLE (u32)(1 << 10) +#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)(1 << 11) +#else +#define IXGBE_FLAG_DCA_ENABLED (u32)0 +#define IXGBE_FLAG_DCA_CAPABLE (u32)0 +#define IXGBE_FLAG_DCA_ENABLED_DATA (u32)0 +#endif +#define IXGBE_FLAG_MQ_CAPABLE (u32)(1 << 12) +#define IXGBE_FLAG_DCB_ENABLED (u32)(1 << 13) +#define IXGBE_FLAG_DCB_CAPABLE (u32)(1 << 14) +#define IXGBE_FLAG_RSS_ENABLED (u32)(1 << 15) +#define IXGBE_FLAG_RSS_CAPABLE (u32)(1 << 16) +#define IXGBE_FLAG_VMDQ_ENABLED (u32)(1 << 18) +#define IXGBE_FLAG_FAN_FAIL_CAPABLE (u32)(1 << 19) +#define IXGBE_FLAG_NEED_LINK_UPDATE (u32)(1 << 20) +#define IXGBE_FLAG_NEED_LINK_CONFIG (u32)(1 << 21) +#define IXGBE_FLAG_FDIR_HASH_CAPABLE (u32)(1 << 22) +#define IXGBE_FLAG_FDIR_PERFECT_CAPABLE (u32)(1 << 23) +#ifdef IXGBE_FCOE +#define IXGBE_FLAG_FCOE_CAPABLE (u32)(1 << 24) +#define IXGBE_FLAG_FCOE_ENABLED (u32)(1 << 25) +#endif /* IXGBE_FCOE */ +#define IXGBE_FLAG_SRIOV_CAPABLE (u32)(1 << 26) +#define IXGBE_FLAG_SRIOV_ENABLED (u32)(1 << 27) +#define IXGBE_FLAG_SRIOV_REPLICATION_ENABLE (u32)(1 << 28) +#define IXGBE_FLAG_SRIOV_L2SWITCH_ENABLE (u32)(1 << 29) +#define IXGBE_FLAG_SRIOV_L2LOOPBACK_ENABLE (u32)(1 << 30) +#define IXGBE_FLAG_RX_BB_CAPABLE (u32)(1 << 31) + + u32 flags2; +#ifndef IXGBE_NO_HW_RSC +#define IXGBE_FLAG2_RSC_CAPABLE (u32)(1) +#define IXGBE_FLAG2_RSC_ENABLED (u32)(1 << 1) +#else +#define IXGBE_FLAG2_RSC_CAPABLE 0 +#define IXGBE_FLAG2_RSC_ENABLED 0 +#endif +#define IXGBE_FLAG2_VMDQ_DEFAULT_OVERRIDE (u32)(1 << 2) +#define IXGBE_FLAG2_TEMP_SENSOR_CAPABLE (u32)(1 << 4) +#define IXGBE_FLAG2_TEMP_SENSOR_EVENT (u32)(1 << 5) +#define IXGBE_FLAG2_SEARCH_FOR_SFP (u32)(1 << 6) +#define IXGBE_FLAG2_SFP_NEEDS_RESET (u32)(1 << 7) +#define IXGBE_FLAG2_RESET_REQUESTED (u32)(1 << 8) +#define IXGBE_FLAG2_FDIR_REQUIRES_REINIT (u32)(1 << 9) +#define IXGBE_FLAG2_RSS_FIELD_IPV4_UDP (u32)(1 << 10) +#define IXGBE_FLAG2_RSS_FIELD_IPV6_UDP (u32)(1 << 11) +#define IXGBE_FLAG2_OVERFLOW_CHECK_ENABLED (u32)(1 << 12) + + /* Tx fast path data */ + int num_tx_queues; + u16 tx_itr_setting; + u16 tx_work_limit; + + /* Rx fast path data */ + int num_rx_queues; + u16 rx_itr_setting; + u16 rx_work_limit; + + /* TX */ + struct ixgbe_ring *tx_ring[MAX_TX_QUEUES] ____cacheline_aligned_in_smp; + + u64 restart_queue; + u64 lsc_int; + u32 tx_timeout_count; + + /* RX */ + struct ixgbe_ring *rx_ring[MAX_RX_QUEUES]; + int num_rx_pools; /* == num_rx_queues in 82598 */ + int num_rx_queues_per_pool; /* 1 if 82598, can be many if 82599 */ + u64 hw_csum_rx_error; + u64 hw_rx_no_dma_resources; + u64 rsc_total_count; + u64 rsc_total_flush; + u64 non_eop_descs; +#ifndef CONFIG_IXGBE_NAPI + u64 rx_dropped_backlog; /* count drops from rx intr handler */ +#endif + u32 alloc_rx_page_failed; + u32 alloc_rx_buff_failed; + + struct ixgbe_q_vector *q_vector[MAX_MSIX_Q_VECTORS]; + +#ifdef HAVE_DCBNL_IEEE + struct ieee_pfc *ixgbe_ieee_pfc; + struct ieee_ets *ixgbe_ieee_ets; +#endif + struct ixgbe_dcb_config dcb_cfg; + struct ixgbe_dcb_config temp_dcb_cfg; + u8 dcb_set_bitmap; + u8 dcbx_cap; +#ifndef HAVE_MQPRIO + u8 tc; +#endif + enum ixgbe_fc_mode last_lfc_mode; + + int num_msix_vectors; + int max_msix_q_vectors; /* true count of q_vectors for device */ + struct ixgbe_ring_feature ring_feature[RING_F_ARRAY_SIZE]; + struct msix_entry *msix_entries; + +#ifndef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats net_stats; +#endif +#ifndef IXGBE_NO_LRO + struct ixgbe_lro_stats lro_stats; +#endif + +#ifdef ETHTOOL_TEST + u32 test_icr; + struct ixgbe_ring test_tx_ring; + struct ixgbe_ring test_rx_ring; +#endif + + /* structs defined in ixgbe_hw.h */ + struct ixgbe_hw hw; + u16 msg_enable; + struct ixgbe_hw_stats stats; +#ifndef IXGBE_NO_LLI + u32 lli_port; + u32 lli_size; + u32 lli_etype; + u32 lli_vlan_pri; +#endif /* IXGBE_NO_LLI */ + + u32 *config_space; + u64 tx_busy; + unsigned int tx_ring_count; + unsigned int rx_ring_count; + + u32 link_speed; + bool link_up; + unsigned long link_check_timeout; + + struct timer_list service_timer; + struct work_struct service_task; + + struct hlist_head fdir_filter_list; + unsigned long fdir_overflow; /* number of times ATR was backed off */ + union ixgbe_atr_input fdir_mask; + int fdir_filter_count; + u32 fdir_pballoc; + u32 atr_sample_rate; + spinlock_t fdir_perfect_lock; + +#ifdef IXGBE_FCOE + struct ixgbe_fcoe fcoe; +#endif /* IXGBE_FCOE */ + u32 wol; + + u16 bd_number; + + char eeprom_id[32]; + u16 eeprom_cap; + bool netdev_registered; + u32 interrupt_event; +#ifdef HAVE_ETHTOOL_SET_PHYS_ID + u32 led_reg; +#endif + + DECLARE_BITMAP(active_vfs, IXGBE_MAX_VF_FUNCTIONS); + unsigned int num_vfs; + struct vf_data_storage *vfinfo; + int vf_rate_link_speed; + struct vf_macvlans vf_mvs; + struct vf_macvlans *mv_list; +#ifdef CONFIG_PCI_IOV + u32 timer_event_accumulator; + u32 vferr_refcount; +#endif + struct ixgbe_mac_addr *mac_table; +#ifdef IXGBE_SYSFS + struct kobject *info_kobj; + struct kobject *therm_kobj[IXGBE_MAX_SENSORS]; +#else /* IXGBE_SYSFS */ +#ifdef IXGBE_PROCFS + struct proc_dir_entry *eth_dir; + struct proc_dir_entry *info_dir; + struct proc_dir_entry *therm_dir[IXGBE_MAX_SENSORS]; + struct ixgbe_therm_proc_data therm_data[IXGBE_MAX_SENSORS]; +#endif /* IXGBE_PROCFS */ +#endif /* IXGBE_SYSFS */ +}; + +struct ixgbe_fdir_filter { + struct hlist_node fdir_node; + union ixgbe_atr_input filter; + u16 sw_idx; + u16 action; +}; + +enum ixgbe_state_t { + __IXGBE_TESTING, + __IXGBE_RESETTING, + __IXGBE_DOWN, + __IXGBE_SERVICE_SCHED, + __IXGBE_IN_SFP_INIT, +}; + +struct ixgbe_cb { +#ifdef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + union { /* Union defining head/tail partner */ + struct sk_buff *head; + struct sk_buff *tail; + }; +#endif + dma_addr_t dma; +#ifndef IXGBE_NO_LRO + __be32 tsecr; /* timestamp echo response */ + u32 tsval; /* timestamp value in host order */ + u32 next_seq; /* next expected sequence number */ + u16 free; /* 65521 minus total size */ + u16 mss; /* size of data portion of packet */ +#endif /* IXGBE_NO_LRO */ +#ifdef HAVE_VLAN_RX_REGISTER + u16 vid; /* VLAN tag */ +#endif + u16 append_cnt; /* number of skb's appended */ +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + bool page_released; +#endif +}; +#define IXGBE_CB(skb) ((struct ixgbe_cb *)(skb)->cb) + +#ifdef IXGBE_SYSFS +void ixgbe_sysfs_exit(struct ixgbe_adapter *adapter); +int ixgbe_sysfs_init(struct ixgbe_adapter *adapter); +#endif /* IXGBE_SYSFS */ +#ifdef IXGBE_PROCFS +void ixgbe_procfs_exit(struct ixgbe_adapter *adapter); +int ixgbe_procfs_init(struct ixgbe_adapter *adapter); +int ixgbe_procfs_topdir_init(void); +void ixgbe_procfs_topdir_exit(void); +#endif /* IXGBE_PROCFS */ + +extern struct dcbnl_rtnl_ops dcbnl_ops; +extern int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max); + +extern u8 ixgbe_dcb_txq_to_tc(struct ixgbe_adapter *adapter, u8 index); + +/* needed by ixgbe_main.c */ +extern int ixgbe_validate_mac_addr(u8 *mc_addr); +extern void ixgbe_check_options(struct ixgbe_adapter *adapter); +extern void ixgbe_assign_netdev_ops(struct net_device *netdev); + +/* needed by ixgbe_ethtool.c */ +extern char ixgbe_driver_name[]; +extern const char ixgbe_driver_version[]; + +extern void ixgbe_up(struct ixgbe_adapter *adapter); +extern void ixgbe_down(struct ixgbe_adapter *adapter); +extern void ixgbe_reinit_locked(struct ixgbe_adapter *adapter); +extern void ixgbe_reset(struct ixgbe_adapter *adapter); +extern void ixgbe_set_ethtool_ops(struct net_device *netdev); +extern int ixgbe_setup_rx_resources(struct ixgbe_ring *); +extern int ixgbe_setup_tx_resources(struct ixgbe_ring *); +extern void ixgbe_free_rx_resources(struct ixgbe_ring *); +extern void ixgbe_free_tx_resources(struct ixgbe_ring *); +extern void ixgbe_configure_rx_ring(struct ixgbe_adapter *, + struct ixgbe_ring *); +extern void ixgbe_configure_tx_ring(struct ixgbe_adapter *, + struct ixgbe_ring *); +extern void ixgbe_update_stats(struct ixgbe_adapter *adapter); +extern int ixgbe_init_interrupt_scheme(struct ixgbe_adapter *adapter); +extern void ixgbe_clear_interrupt_scheme(struct ixgbe_adapter *adapter); +extern bool ixgbe_is_ixgbe(struct pci_dev *pcidev); +extern netdev_tx_t ixgbe_xmit_frame_ring(struct sk_buff *, + struct ixgbe_adapter *, + struct ixgbe_ring *); +extern void ixgbe_unmap_and_free_tx_resource(struct ixgbe_ring *, + struct ixgbe_tx_buffer *); +extern void ixgbe_alloc_rx_buffers(struct ixgbe_ring *, u16); +extern void ixgbe_configure_rscctl(struct ixgbe_adapter *adapter, + struct ixgbe_ring *); +extern void ixgbe_clear_rscctl(struct ixgbe_adapter *adapter, + struct ixgbe_ring *); +extern void ixgbe_set_rx_mode(struct net_device *netdev); +extern int ixgbe_write_mc_addr_list(struct net_device *netdev); +extern int ixgbe_setup_tc(struct net_device *dev, u8 tc); +#ifdef IXGBE_FCOE +extern void ixgbe_tx_ctxtdesc(struct ixgbe_ring *, u32, u32, u32, u32); +#endif /* IXGBE_FCOE */ +extern void ixgbe_do_reset(struct net_device *netdev); +extern void ixgbe_write_eitr(struct ixgbe_q_vector *q_vector); +extern void ixgbe_disable_rx_queue(struct ixgbe_adapter *adapter, + struct ixgbe_ring *); +extern void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter); +extern void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter); +#ifdef ETHTOOL_OPS_COMPAT +extern int ethtool_ioctl(struct ifreq *ifr); +#endif + +#ifdef IXGBE_FCOE +extern void ixgbe_configure_fcoe(struct ixgbe_adapter *adapter); +extern int ixgbe_fso(struct ixgbe_ring *tx_ring, + struct ixgbe_tx_buffer *first, + u8 *hdr_len); +extern void ixgbe_cleanup_fcoe(struct ixgbe_adapter *adapter); +extern int ixgbe_fcoe_ddp(struct ixgbe_adapter *adapter, + union ixgbe_adv_rx_desc *rx_desc, + struct sk_buff *skb); +extern int ixgbe_fcoe_ddp_get(struct net_device *netdev, u16 xid, + struct scatterlist *sgl, unsigned int sgc); +#ifdef HAVE_NETDEV_OPS_FCOE_DDP_TARGET +extern int ixgbe_fcoe_ddp_target(struct net_device *netdev, u16 xid, + struct scatterlist *sgl, unsigned int sgc); +#endif /* HAVE_NETDEV_OPS_FCOE_DDP_TARGET */ +extern int ixgbe_fcoe_ddp_put(struct net_device *netdev, u16 xid); +#ifdef HAVE_NETDEV_OPS_FCOE_ENABLE +extern int ixgbe_fcoe_enable(struct net_device *netdev); +extern int ixgbe_fcoe_disable(struct net_device *netdev); +#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */ +#ifdef CONFIG_DCB +#ifdef HAVE_DCBNL_OPS_GETAPP +extern u8 ixgbe_fcoe_getapp(struct net_device *netdev); +#endif /* HAVE_DCBNL_OPS_GETAPP */ +extern u8 ixgbe_fcoe_setapp(struct ixgbe_adapter *adapter, u8 up); +#endif /* CONFIG_DCB */ +#ifdef HAVE_NETDEV_OPS_FCOE_GETWWN +extern int ixgbe_fcoe_get_wwn(struct net_device *netdev, u64 *wwn, int type); +#endif +#endif /* IXGBE_FCOE */ + +#ifdef CONFIG_DCB +#ifdef HAVE_DCBNL_IEEE +s32 ixgbe_dcb_hw_ets(struct ixgbe_hw *hw, struct ieee_ets *ets, int max_frame); +#endif /* HAVE_DCBNL_IEEE */ +#endif /* CONFIG_DCB */ + +extern void ixgbe_clean_rx_ring(struct ixgbe_ring *rx_ring); +extern int ixgbe_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd); +extern int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter, + struct net_device *netdev, unsigned int vfn); +extern void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter); +extern int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, + u8 *addr, u16 queue); +extern int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, + u8 *addr, u16 queue); +extern int ixgbe_available_rars(struct ixgbe_adapter *adapter); +#ifndef HAVE_VLAN_RX_REGISTER +extern void ixgbe_vlan_mode(struct net_device *, u32); +#endif +#ifndef ixgbe_get_netdev_tc_txq +#define ixgbe_get_netdev_tc_txq(dev, tc) (&dev->tc_to_txq[tc]) +#endif +extern void ixgbe_set_rx_drop_en(struct ixgbe_adapter *adapter); +#endif /* _IXGBE_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c new file mode 100644 index 00000000..24015844 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.c @@ -0,0 +1,1296 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_type.h" +#include "ixgbe_82598.h" +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg); +static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw); +static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete); +static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *link_up, + bool link_up_wait_to_complete); +static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw); +static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw); +static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb, + u32 headroom, int strategy); + +/** + * ixgbe_set_pcie_completion_timeout - set pci-e completion timeout + * @hw: pointer to the HW structure + * + * The defaults for 82598 should be in the range of 50us to 50ms, + * however the hardware default for these parts is 500us to 1ms which is less + * than the 10ms recommended by the pci-e spec. To address this we need to + * increase the value to either 10ms to 250ms for capability version 1 config, + * or 16ms to 55ms for version 2. + **/ +void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw) +{ + u32 gcr = IXGBE_READ_REG(hw, IXGBE_GCR); + u16 pcie_devctl2; + + /* only take action if timeout value is defaulted to 0 */ + if (gcr & IXGBE_GCR_CMPL_TMOUT_MASK) + goto out; + + /* + * if capababilities version is type 1 we can write the + * timeout of 10ms to 250ms through the GCR register + */ + if (!(gcr & IXGBE_GCR_CAP_VER2)) { + gcr |= IXGBE_GCR_CMPL_TMOUT_10ms; + goto out; + } + + /* + * for version 2 capabilities we need to write the config space + * directly in order to set the completion timeout value for + * 16ms to 55ms + */ + pcie_devctl2 = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2); + pcie_devctl2 |= IXGBE_PCI_DEVICE_CONTROL2_16ms; + IXGBE_WRITE_PCIE_WORD(hw, IXGBE_PCI_DEVICE_CONTROL2, pcie_devctl2); +out: + /* disable completion timeout resend */ + gcr &= ~IXGBE_GCR_CMPL_TMOUT_RESEND; + IXGBE_WRITE_REG(hw, IXGBE_GCR, gcr); +} + +/** + * ixgbe_init_ops_82598 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for 82598. + * Does not touch the hardware. + **/ +s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val; + + ret_val = ixgbe_init_phy_ops_generic(hw); + ret_val = ixgbe_init_ops_generic(hw); + + /* PHY */ + phy->ops.init = &ixgbe_init_phy_ops_82598; + + /* MAC */ + mac->ops.start_hw = &ixgbe_start_hw_82598; + mac->ops.reset_hw = &ixgbe_reset_hw_82598; + mac->ops.get_media_type = &ixgbe_get_media_type_82598; + mac->ops.get_supported_physical_layer = + &ixgbe_get_supported_physical_layer_82598; + mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82598; + mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82598; + mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie_82598; + + /* RAR, Multicast, VLAN */ + mac->ops.set_vmdq = &ixgbe_set_vmdq_82598; + mac->ops.clear_vmdq = &ixgbe_clear_vmdq_82598; + mac->ops.set_vfta = &ixgbe_set_vfta_82598; + mac->ops.set_vlvf = NULL; + mac->ops.clear_vfta = &ixgbe_clear_vfta_82598; + + /* Flow Control */ + mac->ops.fc_enable = &ixgbe_fc_enable_82598; + + mac->mcft_size = 128; + mac->vft_size = 128; + mac->num_rar_entries = 16; + mac->rx_pb_size = 512; + mac->max_tx_queues = 32; + mac->max_rx_queues = 64; + mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); + + /* SFP+ Module */ + phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_82598; + + /* Link */ + mac->ops.check_link = &ixgbe_check_mac_link_82598; + mac->ops.setup_link = &ixgbe_setup_mac_link_82598; + mac->ops.flap_tx_laser = NULL; + mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82598; + mac->ops.setup_rxpba = &ixgbe_set_rxpba_82598; + + /* Manageability interface */ + mac->ops.set_fw_drv_ver = NULL; + + return ret_val; +} + +/** + * ixgbe_init_phy_ops_82598 - PHY/SFP specific init + * @hw: pointer to hardware structure + * + * Initialize any function pointers that were not able to be + * set during init_shared_code because the PHY/SFP type was + * not known. Perform the SFP init if necessary. + * + **/ +s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u16 list_offset, data_offset; + + /* Identify the PHY */ + phy->ops.identify(hw); + + /* Overwrite the link function pointers if copper PHY */ + if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) { + mac->ops.setup_link = &ixgbe_setup_copper_link_82598; + mac->ops.get_link_capabilities = + &ixgbe_get_copper_link_capabilities_generic; + } + + switch (hw->phy.type) { + case ixgbe_phy_tn: + phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; + phy->ops.check_link = &ixgbe_check_phy_link_tnx; + phy->ops.get_firmware_version = + &ixgbe_get_phy_firmware_version_tnx; + break; + case ixgbe_phy_nl: + phy->ops.reset = &ixgbe_reset_phy_nl; + + /* Call SFP+ identify routine to get the SFP+ module type */ + ret_val = phy->ops.identify_sfp(hw); + if (ret_val != 0) + goto out; + else if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) { + ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + + /* Check to see if SFP+ module is supported */ + ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, + &list_offset, + &data_offset); + if (ret_val != 0) { + ret_val = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + break; + default: + break; + } + +out: + return ret_val; +} + +/** + * ixgbe_start_hw_82598 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function. + * Disables relaxed ordering Then set pcie completion timeout + * + **/ +s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw) +{ + u32 regval; + u32 i; + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + + /* Disable relaxed ordering */ + for (i = 0; ((i < hw->mac.max_tx_queues) && + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL(i), regval); + } + + for (i = 0; ((i < hw->mac.max_rx_queues) && + (i < IXGBE_DCA_MAX_QUEUES_82598)); i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | + IXGBE_DCA_RXCTRL_HEAD_WRO_EN); + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); + } + + /* set the completion timeout for interface */ + if (ret_val == 0) + ixgbe_set_pcie_completion_timeout(hw); + + return ret_val; +} + +/** + * ixgbe_get_link_capabilities_82598 - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: boolean auto-negotiation value + * + * Determines the link capabilities by reading the AUTOC register. + **/ +static s32 ixgbe_get_link_capabilities_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + s32 status = 0; + u32 autoc = 0; + + /* + * Determine link capabilities based on the stored value of AUTOC, + * which represents EEPROM defaults. If AUTOC value has not been + * stored, use the current register value. + */ + if (hw->mac.orig_link_settings_stored) + autoc = hw->mac.orig_autoc; + else + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *autoneg = false; + break; + + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + *autoneg = false; + break; + + case IXGBE_AUTOC_LMS_1G_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *autoneg = true; + break; + + case IXGBE_AUTOC_LMS_KX4_AN: + case IXGBE_AUTOC_LMS_KX4_AN_1G_AN: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX_SUPP) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + *autoneg = true; + break; + + default: + status = IXGBE_ERR_LINK_SETUP; + break; + } + + return status; +} + +/** + * ixgbe_get_media_type_82598 - Determines media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +static enum ixgbe_media_type ixgbe_get_media_type_82598(struct ixgbe_hw *hw) +{ + enum ixgbe_media_type media_type; + + /* Detect if there is a copper PHY attached. */ + switch (hw->phy.type) { + case ixgbe_phy_cu_unknown: + case ixgbe_phy_tn: + media_type = ixgbe_media_type_copper; + goto out; + default: + break; + } + + /* Media type for I82598 is based on device ID */ + switch (hw->device_id) { + case IXGBE_DEV_ID_82598: + case IXGBE_DEV_ID_82598_BX: + /* Default device ID is mezzanine card KX/KX4 */ + media_type = ixgbe_media_type_backplane; + break; + case IXGBE_DEV_ID_82598AF_DUAL_PORT: + case IXGBE_DEV_ID_82598AF_SINGLE_PORT: + case IXGBE_DEV_ID_82598_DA_DUAL_PORT: + case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: + case IXGBE_DEV_ID_82598EB_XF_LR: + case IXGBE_DEV_ID_82598EB_SFP_LOM: + media_type = ixgbe_media_type_fiber; + break; + case IXGBE_DEV_ID_82598EB_CX4: + case IXGBE_DEV_ID_82598_CX4_DUAL_PORT: + media_type = ixgbe_media_type_cx4; + break; + case IXGBE_DEV_ID_82598AT: + case IXGBE_DEV_ID_82598AT2: + media_type = ixgbe_media_type_copper; + break; + default: + media_type = ixgbe_media_type_unknown; + break; + } +out: + return media_type; +} + +/** + * ixgbe_fc_enable_82598 - Enable flow control + * @hw: pointer to hardware structure + * + * Enable flow control according to the current settings. + **/ +s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 fctrl_reg; + u32 rmcs_reg; + u32 reg; + u32 fcrtl, fcrth; + u32 link_speed = 0; + int i; + bool link_up; + + /* Validate the water mark configuration */ + if (!hw->fc.pause_time) { + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + + /* Low water mark of zero causes XOFF floods */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { + hw_dbg(hw, "Invalid water mark configuration\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + } + } + + /* + * On 82598 having Rx FC on causes resets while doing 1G + * so if it's on turn it off once we know link_speed. For + * more details see 82598 Specification update. + */ + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + if (link_up && link_speed == IXGBE_LINK_SPEED_1GB_FULL) { + switch (hw->fc.requested_mode) { + case ixgbe_fc_full: + hw->fc.requested_mode = ixgbe_fc_tx_pause; + break; + case ixgbe_fc_rx_pause: + hw->fc.requested_mode = ixgbe_fc_none; + break; + default: + /* no change */ + break; + } + } + + /* Negotiate the fc mode to use */ + ixgbe_fc_autoneg(hw); + + /* Disable any previous flow control settings */ + fctrl_reg = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl_reg &= ~(IXGBE_FCTRL_RFCE | IXGBE_FCTRL_RPFCE); + + rmcs_reg = IXGBE_READ_REG(hw, IXGBE_RMCS); + rmcs_reg &= ~(IXGBE_RMCS_TFCE_PRIORITY | IXGBE_RMCS_TFCE_802_3X); + + /* + * The possible values of fc.current_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.current_mode) { + case ixgbe_fc_none: + /* + * Flow control is disabled by software override or autoneg. + * The code below will actually disable it in the HW. + */ + break; + case ixgbe_fc_rx_pause: + /* + * Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + fctrl_reg |= IXGBE_FCTRL_RFCE; + break; + case ixgbe_fc_tx_pause: + /* + * Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + rmcs_reg |= IXGBE_RMCS_TFCE_802_3X; + break; + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + fctrl_reg |= IXGBE_FCTRL_RFCE; + rmcs_reg |= IXGBE_RMCS_TFCE_802_3X; + break; + default: + hw_dbg(hw, "Flow control param set incorrectly\n"); + ret_val = IXGBE_ERR_CONFIG; + goto out; + break; + } + + /* Set 802.3x based flow control settings. */ + fctrl_reg |= IXGBE_FCTRL_DPF; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl_reg); + IXGBE_WRITE_REG(hw, IXGBE_RMCS, rmcs_reg); + + /* Set up and enable Rx high/low water mark thresholds, enable XON. */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; + fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; + IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), fcrtl); + IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), fcrth); + } else { + IXGBE_WRITE_REG(hw, IXGBE_FCRTL(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_FCRTH(i), 0); + } + + } + + /* Configure pause time (2 TCs per register) */ + reg = hw->fc.pause_time * 0x00010001; + for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++) + IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); + + /* Configure flow control refresh threshold value */ + IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2); + +out: + return ret_val; +} + +/** + * ixgbe_start_mac_link_82598 - Configures MAC link settings + * @hw: pointer to hardware structure + * + * Configures link settings based on values in the ixgbe_hw struct. + * Restarts the link. Performs autonegotiation if needed. + **/ +static s32 ixgbe_start_mac_link_82598(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete) +{ + u32 autoc_reg; + u32 links_reg; + u32 i; + s32 status = 0; + + /* Restart link */ + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + + /* Only poll for autoneg to complete if specified to do so */ + if (autoneg_wait_to_complete) { + if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_AN || + (autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_AN_1G_AN) { + links_reg = 0; /* Just in case Autoneg time = 0 */ + for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + if (links_reg & IXGBE_LINKS_KX_AN_COMP) + break; + msleep(100); + } + if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { + status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + hw_dbg(hw, "Autonegotiation did not complete.\n"); + } + } + } + + /* Add delay to filter out noises during initial link setup */ + msleep(50); + + return status; +} + +/** + * ixgbe_validate_link_ready - Function looks for phy link + * @hw: pointer to hardware structure + * + * Function indicates success when phy link is available. If phy is not ready + * within 5 seconds of MAC indicating link, the function returns error. + **/ +static s32 ixgbe_validate_link_ready(struct ixgbe_hw *hw) +{ + u32 timeout; + u16 an_reg; + + if (hw->device_id != IXGBE_DEV_ID_82598AT2) + return 0; + + for (timeout = 0; + timeout < IXGBE_VALIDATE_LINK_READY_TIMEOUT; timeout++) { + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &an_reg); + + if ((an_reg & IXGBE_MII_AUTONEG_COMPLETE) && + (an_reg & IXGBE_MII_AUTONEG_LINK_UP)) + break; + + msleep(100); + } + + if (timeout == IXGBE_VALIDATE_LINK_READY_TIMEOUT) { + hw_dbg(hw, "Link was indicated but link is down\n"); + return IXGBE_ERR_LINK_SETUP; + } + + return 0; +} + +/** + * ixgbe_check_mac_link_82598 - Get link/speed status + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @link_up: true is link is up, false otherwise + * @link_up_wait_to_complete: bool used to wait for link up or not + * + * Reads the links register to determine if link is up and the current speed + **/ +static s32 ixgbe_check_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *link_up, + bool link_up_wait_to_complete) +{ + u32 links_reg; + u32 i; + u16 link_reg, adapt_comp_reg; + + /* + * SERDES PHY requires us to read link status from undocumented + * register 0xC79F. Bit 0 set indicates link is up/ready; clear + * indicates link down. OxC00C is read to check that the XAUI lanes + * are active. Bit 0 clear indicates active; set indicates inactive. + */ + if (hw->phy.type == ixgbe_phy_nl) { + hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg); + hw->phy.ops.read_reg(hw, 0xC79F, IXGBE_TWINAX_DEV, &link_reg); + hw->phy.ops.read_reg(hw, 0xC00C, IXGBE_TWINAX_DEV, + &adapt_comp_reg); + if (link_up_wait_to_complete) { + for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { + if ((link_reg & 1) && + ((adapt_comp_reg & 1) == 0)) { + *link_up = true; + break; + } else { + *link_up = false; + } + msleep(100); + hw->phy.ops.read_reg(hw, 0xC79F, + IXGBE_TWINAX_DEV, + &link_reg); + hw->phy.ops.read_reg(hw, 0xC00C, + IXGBE_TWINAX_DEV, + &adapt_comp_reg); + } + } else { + if ((link_reg & 1) && ((adapt_comp_reg & 1) == 0)) + *link_up = true; + else + *link_up = false; + } + + if (*link_up == false) + goto out; + } + + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + if (link_up_wait_to_complete) { + for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { + if (links_reg & IXGBE_LINKS_UP) { + *link_up = true; + break; + } else { + *link_up = false; + } + msleep(100); + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + } + } else { + if (links_reg & IXGBE_LINKS_UP) + *link_up = true; + else + *link_up = false; + } + + if (links_reg & IXGBE_LINKS_SPEED) + *speed = IXGBE_LINK_SPEED_10GB_FULL; + else + *speed = IXGBE_LINK_SPEED_1GB_FULL; + + if ((hw->device_id == IXGBE_DEV_ID_82598AT2) && (*link_up == true) && + (ixgbe_validate_link_ready(hw) != 0)) + *link_up = false; + +out: + return 0; +} + +/** + * ixgbe_setup_mac_link_82598 - Set MAC link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Set the link speed in the AUTOC register and restarts link. + **/ +static s32 ixgbe_setup_mac_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; + u32 curr_autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 autoc = curr_autoc; + u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; + + /* Check to see if speed passed in is supported. */ + ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg); + speed &= link_capabilities; + + if (speed == IXGBE_LINK_SPEED_UNKNOWN) + status = IXGBE_ERR_LINK_SETUP; + + /* Set KX4/KX support according to speed requested */ + else if (link_mode == IXGBE_AUTOC_LMS_KX4_AN || + link_mode == IXGBE_AUTOC_LMS_KX4_AN_1G_AN) { + autoc &= ~IXGBE_AUTOC_KX4_KX_SUPP_MASK; + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + autoc |= IXGBE_AUTOC_KX4_SUPP; + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + autoc |= IXGBE_AUTOC_KX_SUPP; + if (autoc != curr_autoc) + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); + } + + if (status == 0) { + /* + * Setup and restart the link based on the new values in + * ixgbe_hw This will write the AUTOC register based on the new + * stored values + */ + status = ixgbe_start_mac_link_82598(hw, + autoneg_wait_to_complete); + } + + return status; +} + + +/** + * ixgbe_setup_copper_link_82598 - Set the PHY autoneg advertised field + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true if waiting is needed to complete + * + * Sets the link speed in the AUTOC register in the MAC and restarts link. + **/ +static s32 ixgbe_setup_copper_link_82598(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status; + + /* Setup the PHY according to input speed */ + status = hw->phy.ops.setup_link_speed(hw, speed, autoneg, + autoneg_wait_to_complete); + /* Set up MAC */ + ixgbe_start_mac_link_82598(hw, autoneg_wait_to_complete); + + return status; +} + +/** + * ixgbe_reset_hw_82598 - Performs hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks and + * clears all interrupts, performing a PHY reset, and performing a link (MAC) + * reset. + **/ +static s32 ixgbe_reset_hw_82598(struct ixgbe_hw *hw) +{ + s32 status = 0; + s32 phy_status = 0; + u32 ctrl; + u32 gheccr; + u32 i; + u32 autoc; + u8 analog_val; + + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != 0) + goto reset_hw_out; + + /* + * Power up the Atlas Tx lanes if they are currently powered down. + * Atlas Tx lanes are powered down for MAC loopback tests, but + * they are not automatically restored on reset. + */ + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &analog_val); + if (analog_val & IXGBE_ATLAS_PDN_TX_REG_EN) { + /* Enable Tx Atlas so packets can be transmitted again */ + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_REG_EN; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, + analog_val); + + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_10G_QL_ALL; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, + analog_val); + + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_1G_QL_ALL; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, + analog_val); + + hw->mac.ops.read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, + &analog_val); + analog_val &= ~IXGBE_ATLAS_PDN_TX_AN_QL_ALL; + hw->mac.ops.write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, + analog_val); + } + + /* Reset PHY */ + if (hw->phy.reset_disable == false) { + /* PHY ops must be identified and initialized prior to reset */ + + /* Init PHY and function pointers, perform SFP setup */ + phy_status = hw->phy.ops.init(hw); + if (phy_status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + if (phy_status == IXGBE_ERR_SFP_NOT_PRESENT) + goto mac_reset_top; + + hw->phy.ops.reset(hw); + } + +mac_reset_top: + /* + * Issue global reset to the MAC. This needs to be a SW reset. + * If link reset is used, it might reset the MAC when mng is using it + */ + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL) | IXGBE_CTRL_RST; + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST)) + break; + } + if (ctrl & IXGBE_CTRL_RST) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "Reset polling failed to complete.\n"); + } + + msleep(50); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + gheccr = IXGBE_READ_REG(hw, IXGBE_GHECCR); + gheccr &= ~((1 << 21) | (1 << 18) | (1 << 9) | (1 << 6)); + IXGBE_WRITE_REG(hw, IXGBE_GHECCR, gheccr); + + /* + * Store the original AUTOC value if it has not been + * stored off yet. Otherwise restore the stored original + * AUTOC value since the reset operation sets back to deaults. + */ + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + if (hw->mac.orig_link_settings_stored == false) { + hw->mac.orig_autoc = autoc; + hw->mac.orig_link_settings_stored = true; + } else if (autoc != hw->mac.orig_autoc) { + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, hw->mac.orig_autoc); + } + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table + */ + hw->mac.ops.init_rx_addrs(hw); + +reset_hw_out: + if (phy_status != 0) + status = phy_status; + + return status; +} + +/** + * ixgbe_set_vmdq_82598 - Associate a VMDq set index with a rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to associate with a VMDq index + * @vmdq: VMDq set index + **/ +s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); + rar_high &= ~IXGBE_RAH_VIND_MASK; + rar_high |= ((vmdq << IXGBE_RAH_VIND_SHIFT) & IXGBE_RAH_VIND_MASK); + IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); + return 0; +} + +/** + * ixgbe_clear_vmdq_82598 - Disassociate a VMDq set index from an rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to associate with a VMDq index + * @vmdq: VMDq clear index (not used in 82598, but elsewhere) + **/ +static s32 ixgbe_clear_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); + if (rar_high & IXGBE_RAH_VIND_MASK) { + rar_high &= ~IXGBE_RAH_VIND_MASK; + IXGBE_WRITE_REG(hw, IXGBE_RAH(rar), rar_high); + } + + return 0; +} + +/** + * ixgbe_set_vfta_82598 - Set VLAN filter table + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFTA + * @vlan_on: boolean flag to turn on/off VLAN in VFTA + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on) +{ + u32 regindex; + u32 bitindex; + u32 bits; + u32 vftabyte; + + if (vlan > 4095) + return IXGBE_ERR_PARAM; + + /* Determine 32-bit word position in array */ + regindex = (vlan >> 5) & 0x7F; /* upper seven bits */ + + /* Determine the location of the (VMD) queue index */ + vftabyte = ((vlan >> 3) & 0x03); /* bits (4:3) indicating byte array */ + bitindex = (vlan & 0x7) << 2; /* lower 3 bits indicate nibble */ + + /* Set the nibble for VMD queue index */ + bits = IXGBE_READ_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex)); + bits &= (~(0x0F << bitindex)); + bits |= (vind << bitindex); + IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vftabyte, regindex), bits); + + /* Determine the location of the bit for this VLAN id */ + bitindex = vlan & 0x1F; /* lower five bits */ + + bits = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); + if (vlan_on) + /* Turn on this VLAN id */ + bits |= (1 << bitindex); + else + /* Turn off this VLAN id */ + bits &= ~(1 << bitindex); + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), bits); + + return 0; +} + +/** + * ixgbe_clear_vfta_82598 - Clear VLAN filter table + * @hw: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +static s32 ixgbe_clear_vfta_82598(struct ixgbe_hw *hw) +{ + u32 offset; + u32 vlanbyte; + + for (offset = 0; offset < hw->mac.vft_size; offset++) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0); + + for (vlanbyte = 0; vlanbyte < 4; vlanbyte++) + for (offset = 0; offset < hw->mac.vft_size; offset++) + IXGBE_WRITE_REG(hw, IXGBE_VFTAVIND(vlanbyte, offset), + 0); + + return 0; +} + +/** + * ixgbe_read_analog_reg8_82598 - Reads 8 bit Atlas analog register + * @hw: pointer to hardware structure + * @reg: analog register to read + * @val: read value + * + * Performs read operation to Atlas analog register specified. + **/ +s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val) +{ + u32 atlas_ctl; + + IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, + IXGBE_ATLASCTL_WRITE_CMD | (reg << 8)); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + atlas_ctl = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); + *val = (u8)atlas_ctl; + + return 0; +} + +/** + * ixgbe_write_analog_reg8_82598 - Writes 8 bit Atlas analog register + * @hw: pointer to hardware structure + * @reg: atlas register to write + * @val: value to write + * + * Performs write operation to Atlas analog register specified. + **/ +s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val) +{ + u32 atlas_ctl; + + atlas_ctl = (reg << 8) | val; + IXGBE_WRITE_REG(hw, IXGBE_ATLASCTL, atlas_ctl); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + + return 0; +} + +/** + * ixgbe_read_i2c_eeprom_82598 - Reads 8 bit word over I2C interface. + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs 8 byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data) +{ + s32 status = 0; + u16 sfp_addr = 0; + u16 sfp_data = 0; + u16 sfp_stat = 0; + u32 i; + + if (hw->phy.type == ixgbe_phy_nl) { + /* + * NetLogic phy SDA/SCL registers are at addresses 0xC30A to + * 0xC30D. These registers are used to talk to the SFP+ + * module's EEPROM through the SDA/SCL (I2C) interface. + */ + sfp_addr = (IXGBE_I2C_EEPROM_DEV_ADDR << 8) + byte_offset; + sfp_addr = (sfp_addr | IXGBE_I2C_EEPROM_READ_MASK); + hw->phy.ops.write_reg(hw, + IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + sfp_addr); + + /* Poll status */ + for (i = 0; i < 100; i++) { + hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &sfp_stat); + sfp_stat = sfp_stat & IXGBE_I2C_EEPROM_STATUS_MASK; + if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS) + break; + msleep(10); + } + + if (sfp_stat != IXGBE_I2C_EEPROM_STATUS_PASS) { + hw_dbg(hw, "EEPROM read did not pass.\n"); + status = IXGBE_ERR_SFP_NOT_PRESENT; + goto out; + } + + /* Read data */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &sfp_data); + + *eeprom_data = (u8)(sfp_data >> 8); + } else { + status = IXGBE_ERR_PHY; + goto out; + } + +out: + return status; +} + +/** + * ixgbe_get_supported_physical_layer_82598 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw) +{ + u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 pma_pmd_10g = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK; + u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; + u16 ext_ability = 0; + + hw->phy.ops.identify(hw); + + /* Copper PHY must be checked before AUTOC LMS to determine correct + * physical layer because 10GBase-T PHYs use LMS = KX4/KX */ + switch (hw->phy.type) { + case ixgbe_phy_tn: + case ixgbe_phy_cu_unknown: + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + goto out; + default: + break; + } + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_AN: + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + if (pma_pmd_1g == IXGBE_AUTOC_1G_KX) + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX; + else + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_BX; + break; + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + if (pma_pmd_10g == IXGBE_AUTOC_10G_CX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; + else if (pma_pmd_10g == IXGBE_AUTOC_10G_KX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + else /* XAUI */ + physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + break; + case IXGBE_AUTOC_LMS_KX4_AN: + case IXGBE_AUTOC_LMS_KX4_AN_1G_AN: + if (autoc & IXGBE_AUTOC_KX_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + break; + default: + break; + } + + if (hw->phy.type == ixgbe_phy_nl) { + hw->phy.ops.identify_sfp(hw); + + switch (hw->phy.sfp_type) { + case ixgbe_sfp_type_da_cu: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; + break; + case ixgbe_sfp_type_sr: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; + break; + case ixgbe_sfp_type_lr: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; + break; + default: + physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + break; + } + } + + switch (hw->device_id) { + case IXGBE_DEV_ID_82598_DA_DUAL_PORT: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; + break; + case IXGBE_DEV_ID_82598AF_DUAL_PORT: + case IXGBE_DEV_ID_82598AF_SINGLE_PORT: + case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; + break; + case IXGBE_DEV_ID_82598EB_XF_LR: + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; + break; + default: + break; + } + +out: + return physical_layer; +} + +/** + * ixgbe_set_lan_id_multi_port_pcie_82598 - Set LAN id for PCIe multiple + * port devices. + * @hw: pointer to the HW structure + * + * Calls common function and corrects issue with some single port devices + * that enable LAN1 but not LAN0. + **/ +void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw) +{ + struct ixgbe_bus_info *bus = &hw->bus; + u16 pci_gen = 0; + u16 pci_ctrl2 = 0; + + ixgbe_set_lan_id_multi_port_pcie(hw); + + /* check if LAN0 is disabled */ + hw->eeprom.ops.read(hw, IXGBE_PCIE_GENERAL_PTR, &pci_gen); + if ((pci_gen != 0) && (pci_gen != 0xFFFF)) { + + hw->eeprom.ops.read(hw, pci_gen + IXGBE_PCIE_CTRL2, &pci_ctrl2); + + /* if LAN0 is completely disabled force function to 0 */ + if ((pci_ctrl2 & IXGBE_PCIE_CTRL2_LAN_DISABLE) && + !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DISABLE_SELECT) && + !(pci_ctrl2 & IXGBE_PCIE_CTRL2_DUMMY_ENABLE)) { + + bus->func = 0; + } + } +} + +/** + * ixgbe_set_rxpba_82598 - Initialize RX packet buffer + * @hw: pointer to hardware structure + * @num_pb: number of packet buffers to allocate + * @headroom: reserve n KB of headroom + * @strategy: packet buffer allocation strategy + **/ +static void ixgbe_set_rxpba_82598(struct ixgbe_hw *hw, int num_pb, + u32 headroom, int strategy) +{ + u32 rxpktsize = IXGBE_RXPBSIZE_64KB; + u8 i = 0; + + if (!num_pb) + return; + + /* Setup Rx packet buffer sizes */ + switch (strategy) { + case PBA_STRATEGY_WEIGHTED: + /* Setup the first four at 80KB */ + rxpktsize = IXGBE_RXPBSIZE_80KB; + for (; i < 4; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + /* Setup the last four at 48KB...don't re-init i */ + rxpktsize = IXGBE_RXPBSIZE_48KB; + /* Fall Through */ + case PBA_STRATEGY_EQUAL: + default: + /* Divide the remaining Rx packet buffer evenly among the TCs */ + for (; i < IXGBE_MAX_PACKET_BUFFERS; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + break; + } + + /* Setup Tx packet buffer sizes */ + for (i = 0; i < IXGBE_MAX_PACKET_BUFFERS; i++) + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), IXGBE_TXPBSIZE_40KB); + + return; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h new file mode 100644 index 00000000..c6abb020 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82598.h @@ -0,0 +1,44 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_82598_H_ +#define _IXGBE_82598_H_ + +u32 ixgbe_get_pcie_msix_count_82598(struct ixgbe_hw *hw); +s32 ixgbe_fc_enable_82598(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_82598(struct ixgbe_hw *hw); +s32 ixgbe_set_vmdq_82598(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_set_vfta_82598(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on); +s32 ixgbe_read_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 *val); +s32 ixgbe_write_analog_reg8_82598(struct ixgbe_hw *hw, u32 reg, u8 val); +s32 ixgbe_read_i2c_eeprom_82598(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data); +u32 ixgbe_get_supported_physical_layer_82598(struct ixgbe_hw *hw); +s32 ixgbe_init_phy_ops_82598(struct ixgbe_hw *hw); +void ixgbe_set_lan_id_multi_port_pcie_82598(struct ixgbe_hw *hw); +void ixgbe_set_pcie_completion_timeout(struct ixgbe_hw *hw); +#endif /* _IXGBE_82598_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c new file mode 100644 index 00000000..017dfe16 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.c @@ -0,0 +1,2313 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_type.h" +#include "ixgbe_82599.h" +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw); +static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, + u16 offset, u16 *data); +static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); + +void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + + /* enable the laser control functions for SFP+ fiber */ + if (mac->ops.get_media_type(hw) == ixgbe_media_type_fiber) { + mac->ops.disable_tx_laser = + &ixgbe_disable_tx_laser_multispeed_fiber; + mac->ops.enable_tx_laser = + &ixgbe_enable_tx_laser_multispeed_fiber; + mac->ops.flap_tx_laser = &ixgbe_flap_tx_laser_multispeed_fiber; + + } else { + mac->ops.disable_tx_laser = NULL; + mac->ops.enable_tx_laser = NULL; + mac->ops.flap_tx_laser = NULL; + } + + if (hw->phy.multispeed_fiber) { + /* Set up dual speed SFP+ support */ + mac->ops.setup_link = &ixgbe_setup_mac_link_multispeed_fiber; + } else { + if ((ixgbe_get_media_type(hw) == ixgbe_media_type_backplane) && + (hw->phy.smart_speed == ixgbe_smart_speed_auto || + hw->phy.smart_speed == ixgbe_smart_speed_on) && + !ixgbe_verify_lesm_fw_enabled_82599(hw)) { + mac->ops.setup_link = &ixgbe_setup_mac_link_smartspeed; + } else { + mac->ops.setup_link = &ixgbe_setup_mac_link_82599; + } + } +} + +/** + * ixgbe_init_phy_ops_82599 - PHY/SFP specific init + * @hw: pointer to hardware structure + * + * Initialize any function pointers that were not able to be + * set during init_shared_code because the PHY/SFP type was + * not known. Perform the SFP init if necessary. + * + **/ +s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + s32 ret_val = 0; + u32 esdp; + + if (hw->device_id == IXGBE_DEV_ID_82599_QSFP_SF_QP) { + /* Store flag indicating I2C bus access control unit. */ + hw->phy.qsfp_shared_i2c_bus = TRUE; + + /* Initialize access to QSFP+ I2C bus */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp |= IXGBE_ESDP_SDP0_DIR; + esdp &= ~IXGBE_ESDP_SDP1_DIR; + esdp &= ~IXGBE_ESDP_SDP0; + esdp &= ~IXGBE_ESDP_SDP0_NATIVE; + esdp &= ~IXGBE_ESDP_SDP1_NATIVE; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + + phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_82599; + phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_82599; + } + /* Identify the PHY or SFP module */ + ret_val = phy->ops.identify(hw); + if (ret_val == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto init_phy_ops_out; + + /* Setup function pointers based on detected SFP module and speeds */ + ixgbe_init_mac_link_ops_82599(hw); + if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) + hw->phy.ops.reset = NULL; + + /* If copper media, overwrite with copper function pointers */ + if (mac->ops.get_media_type(hw) == ixgbe_media_type_copper) { + mac->ops.setup_link = &ixgbe_setup_copper_link_82599; + mac->ops.get_link_capabilities = + &ixgbe_get_copper_link_capabilities_generic; + } + + /* Set necessary function pointers based on phy type */ + switch (hw->phy.type) { + case ixgbe_phy_tn: + phy->ops.setup_link = &ixgbe_setup_phy_link_tnx; + phy->ops.check_link = &ixgbe_check_phy_link_tnx; + phy->ops.get_firmware_version = + &ixgbe_get_phy_firmware_version_tnx; + break; + default: + break; + } +init_phy_ops_out: + return ret_val; +} + +s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 reg_anlp1 = 0; + u32 i = 0; + u16 list_offset, data_offset, data_value; + + if (hw->phy.sfp_type != ixgbe_sfp_type_unknown) { + ixgbe_init_mac_link_ops_82599(hw); + + hw->phy.ops.reset = NULL; + + ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, + &data_offset); + if (ret_val != 0) + goto setup_sfp_out; + + /* PHY config will finish before releasing the semaphore */ + ret_val = hw->mac.ops.acquire_swfw_sync(hw, + IXGBE_GSSR_MAC_CSR_SM); + if (ret_val != 0) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto setup_sfp_out; + } + + hw->eeprom.ops.read(hw, ++data_offset, &data_value); + while (data_value != 0xffff) { + IXGBE_WRITE_REG(hw, IXGBE_CORECTL, data_value); + IXGBE_WRITE_FLUSH(hw); + hw->eeprom.ops.read(hw, ++data_offset, &data_value); + } + + /* Release the semaphore */ + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_MAC_CSR_SM); + /* Delay obtaining semaphore again to allow FW access */ + msleep(hw->eeprom.semaphore_delay); + + /* Now restart DSP by setting Restart_AN and clearing LMS */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, ((IXGBE_READ_REG(hw, + IXGBE_AUTOC) & ~IXGBE_AUTOC_LMS_MASK) | + IXGBE_AUTOC_AN_RESTART)); + + /* Wait for AN to leave state 0 */ + for (i = 0; i < 10; i++) { + msleep(4); + reg_anlp1 = IXGBE_READ_REG(hw, IXGBE_ANLP1); + if (reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK) + break; + } + if (!(reg_anlp1 & IXGBE_ANLP1_AN_STATE_MASK)) { + hw_dbg(hw, "sfp module setup not complete\n"); + ret_val = IXGBE_ERR_SFP_SETUP_NOT_COMPLETE; + goto setup_sfp_out; + } + + /* Restart DSP by setting Restart_AN and return to SFI mode */ + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (IXGBE_READ_REG(hw, + IXGBE_AUTOC) | IXGBE_AUTOC_LMS_10G_SERIAL | + IXGBE_AUTOC_AN_RESTART)); + } + +setup_sfp_out: + return ret_val; +} + +/** + * ixgbe_init_ops_82599 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for 82599. + * Does not touch the hardware. + **/ + +s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val; + + ixgbe_init_phy_ops_generic(hw); + ret_val = ixgbe_init_ops_generic(hw); + + /* PHY */ + phy->ops.identify = &ixgbe_identify_phy_82599; + phy->ops.init = &ixgbe_init_phy_ops_82599; + + /* MAC */ + mac->ops.reset_hw = &ixgbe_reset_hw_82599; + mac->ops.get_media_type = &ixgbe_get_media_type_82599; + mac->ops.get_supported_physical_layer = + &ixgbe_get_supported_physical_layer_82599; + mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic; + mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic; + mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_82599; + mac->ops.read_analog_reg8 = &ixgbe_read_analog_reg8_82599; + mac->ops.write_analog_reg8 = &ixgbe_write_analog_reg8_82599; + mac->ops.start_hw = &ixgbe_start_hw_82599; + mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic; + mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic; + mac->ops.get_device_caps = &ixgbe_get_device_caps_generic; + mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic; + mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic; + + /* RAR, Multicast, VLAN */ + mac->ops.set_vmdq = &ixgbe_set_vmdq_generic; + mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic; + mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic; + mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic; + mac->rar_highwater = 1; + mac->ops.set_vfta = &ixgbe_set_vfta_generic; + mac->ops.set_vlvf = &ixgbe_set_vlvf_generic; + mac->ops.clear_vfta = &ixgbe_clear_vfta_generic; + mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic; + mac->ops.setup_sfp = &ixgbe_setup_sfp_modules_82599; + mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing; + mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing; + + /* Link */ + mac->ops.get_link_capabilities = &ixgbe_get_link_capabilities_82599; + mac->ops.check_link = &ixgbe_check_mac_link_generic; + mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic; + ixgbe_init_mac_link_ops_82599(hw); + + mac->mcft_size = 128; + mac->vft_size = 128; + mac->num_rar_entries = 128; + mac->rx_pb_size = 512; + mac->max_tx_queues = 128; + mac->max_rx_queues = 128; + mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); + + mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) & + IXGBE_FWSM_MODE_MASK) ? true : false; + + //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf; + + /* EEPROM */ + eeprom->ops.read = &ixgbe_read_eeprom_82599; + eeprom->ops.read_buffer = &ixgbe_read_eeprom_buffer_82599; + + /* Manageability interface */ + mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic; + + mac->ops.get_thermal_sensor_data = + &ixgbe_get_thermal_sensor_data_generic; + mac->ops.init_thermal_sensor_thresh = + &ixgbe_init_thermal_sensor_thresh_generic; + + return ret_val; +} + +/** + * ixgbe_get_link_capabilities_82599 - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @negotiation: true when autoneg or autotry is enabled + * + * Determines the link capabilities by reading the AUTOC register. + **/ +s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *negotiation) +{ + s32 status = 0; + u32 autoc = 0; + + /* Check if 1G SFP module. */ + if (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1) { + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + goto out; + } + + /* + * Determine link capabilities based on the stored value of AUTOC, + * which represents EEPROM defaults. If AUTOC value has not + * been stored, use the current register values. + */ + if (hw->mac.orig_link_settings_stored) + autoc = hw->mac.orig_autoc; + else + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = false; + break; + + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + *negotiation = false; + break; + + case IXGBE_AUTOC_LMS_1G_AN: + *speed = IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + break; + + case IXGBE_AUTOC_LMS_10G_SERIAL: + *speed = IXGBE_LINK_SPEED_10GB_FULL; + *negotiation = false; + break; + + case IXGBE_AUTOC_LMS_KX4_KX_KR: + case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN: + *speed = IXGBE_LINK_SPEED_UNKNOWN; + if (autoc & IXGBE_AUTOC_KR_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX_SUPP) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + break; + + case IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII: + *speed = IXGBE_LINK_SPEED_100_FULL; + if (autoc & IXGBE_AUTOC_KR_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (autoc & IXGBE_AUTOC_KX_SUPP) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + break; + + case IXGBE_AUTOC_LMS_SGMII_1G_100M: + *speed = IXGBE_LINK_SPEED_1GB_FULL | IXGBE_LINK_SPEED_100_FULL; + *negotiation = false; + break; + + default: + status = IXGBE_ERR_LINK_SETUP; + goto out; + break; + } + + if (hw->phy.multispeed_fiber) { + *speed |= IXGBE_LINK_SPEED_10GB_FULL | + IXGBE_LINK_SPEED_1GB_FULL; + *negotiation = true; + } + +out: + return status; +} + +/** + * ixgbe_get_media_type_82599 - Get media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw) +{ + enum ixgbe_media_type media_type; + + /* Detect if there is a copper PHY attached. */ + switch (hw->phy.type) { + case ixgbe_phy_cu_unknown: + case ixgbe_phy_tn: + media_type = ixgbe_media_type_copper; + goto out; + default: + break; + } + + switch (hw->device_id) { + case IXGBE_DEV_ID_82599_KX4: + case IXGBE_DEV_ID_82599_KX4_MEZZ: + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + case IXGBE_DEV_ID_82599_KR: + case IXGBE_DEV_ID_82599_BACKPLANE_FCOE: + case IXGBE_DEV_ID_82599_XAUI_LOM: + /* Default device ID is mezzanine card KX/KX4 */ + media_type = ixgbe_media_type_backplane; + break; + case IXGBE_DEV_ID_82599_SFP: + case IXGBE_DEV_ID_82599_SFP_FCOE: + case IXGBE_DEV_ID_82599_SFP_EM: + case IXGBE_DEV_ID_82599_SFP_SF2: + case IXGBE_DEV_ID_82599EN_SFP: + media_type = ixgbe_media_type_fiber; + break; + case IXGBE_DEV_ID_82599_CX4: + media_type = ixgbe_media_type_cx4; + break; + case IXGBE_DEV_ID_82599_T3_LOM: + media_type = ixgbe_media_type_copper; + break; + case IXGBE_DEV_ID_82599_LS: + media_type = ixgbe_media_type_fiber_lco; + break; + case IXGBE_DEV_ID_82599_QSFP_SF_QP: + media_type = ixgbe_media_type_fiber_qsfp; + break; + default: + media_type = ixgbe_media_type_unknown; + break; + } +out: + return media_type; +} + +/** + * ixgbe_start_mac_link_82599 - Setup MAC link settings + * @hw: pointer to hardware structure + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Configures link settings based on values in the ixgbe_hw struct. + * Restarts the link. Performs autonegotiation if needed. + **/ +s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete) +{ + u32 autoc_reg; + u32 links_reg = 0; + u32 i; + s32 status = 0; + + /* Restart link */ + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + + /* Only poll for autoneg to complete if specified to do so */ + if (autoneg_wait_to_complete) { + if ((autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_KX_KR || + (autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || + (autoc_reg & IXGBE_AUTOC_LMS_MASK) == + IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { + for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + if (links_reg & IXGBE_LINKS_KX_AN_COMP) + break; + msleep(100); + } + if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { + status = IXGBE_ERR_AUTONEG_NOT_COMPLETE; + hw_dbg(hw, "Autoneg did not complete.\n"); + } + } + } + + /* Add delay to filter out noises during initial link setup */ + msleep(50); + + return status; +} + +/** + * ixgbe_disable_tx_laser_multispeed_fiber - Disable Tx laser + * @hw: pointer to hardware structure + * + * The base drivers may require better control over SFP+ module + * PHY states. This includes selectively shutting down the Tx + * laser on the PHY, effectively halting physical link. + **/ +void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + + /* Disable tx laser; allow 100us to go dark per spec */ + esdp_reg |= IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + udelay(100); +} + +/** + * ixgbe_enable_tx_laser_multispeed_fiber - Enable Tx laser + * @hw: pointer to hardware structure + * + * The base drivers may require better control over SFP+ module + * PHY states. This includes selectively turning on the Tx + * laser on the PHY, effectively starting physical link. + **/ +void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + + /* Enable tx laser; allow 100ms to light up */ + esdp_reg &= ~IXGBE_ESDP_SDP3; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + msleep(100); +} + +/** + * ixgbe_flap_tx_laser_multispeed_fiber - Flap Tx laser + * @hw: pointer to hardware structure + * + * When the driver changes the link speeds that it can support, + * it sets autotry_restart to true to indicate that we need to + * initiate a new autotry session with the link partner. To do + * so, we set the speed then disable and re-enable the tx laser, to + * alert the link partner that it also needs to restart autotry on its + * end. This is consistent with true clause 37 autoneg, which also + * involves a loss of signal. + **/ +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw) +{ + if (hw->mac.autotry_restart) { + ixgbe_disable_tx_laser_multispeed_fiber(hw); + ixgbe_enable_tx_laser_multispeed_fiber(hw); + hw->mac.autotry_restart = false; + } +} + +/** + * ixgbe_setup_mac_link_multispeed_fiber - Set MAC link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Set the link speed in the AUTOC register and restarts link. + **/ +s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; + ixgbe_link_speed highest_link_speed = IXGBE_LINK_SPEED_UNKNOWN; + u32 speedcnt = 0; + u32 esdp_reg = IXGBE_READ_REG(hw, IXGBE_ESDP); + u32 i = 0; + bool link_up = false; + bool negotiation; + + /* Mask off requested but non-supported speeds */ + status = ixgbe_get_link_capabilities(hw, &link_speed, &negotiation); + if (status != 0) + return status; + + speed &= link_speed; + + /* + * Try each speed one by one, highest priority first. We do this in + * software because 10gb fiber doesn't support speed autonegotiation. + */ + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { + speedcnt++; + highest_link_speed = IXGBE_LINK_SPEED_10GB_FULL; + + /* If we already have link at this speed, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + return status; + + if ((link_speed == IXGBE_LINK_SPEED_10GB_FULL) && link_up) + goto out; + + /* Set the module link speed */ + esdp_reg |= (IXGBE_ESDP_SDP5_DIR | IXGBE_ESDP_SDP5); + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + + /* Allow module to change analog characteristics (1G->10G) */ + msleep(40); + + status = ixgbe_setup_mac_link_82599(hw, + IXGBE_LINK_SPEED_10GB_FULL, + autoneg, + autoneg_wait_to_complete); + if (status != 0) + return status; + + /* Flap the tx laser if it has not already been done */ + ixgbe_flap_tx_laser(hw); + + /* + * Wait for the controller to acquire link. Per IEEE 802.3ap, + * Section 73.10.2, we may have to wait up to 500ms if KR is + * attempted. 82599 uses the same timing for 10g SFI. + */ + for (i = 0; i < 5; i++) { + /* Wait for the link partner to also set speed */ + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, + &link_up, false); + if (status != 0) + return status; + + if (link_up) + goto out; + } + } + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { + speedcnt++; + if (highest_link_speed == IXGBE_LINK_SPEED_UNKNOWN) + highest_link_speed = IXGBE_LINK_SPEED_1GB_FULL; + + /* If we already have link at this speed, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + return status; + + if ((link_speed == IXGBE_LINK_SPEED_1GB_FULL) && link_up) + goto out; + + /* Set the module link speed */ + esdp_reg &= ~IXGBE_ESDP_SDP5; + esdp_reg |= IXGBE_ESDP_SDP5_DIR; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp_reg); + IXGBE_WRITE_FLUSH(hw); + + /* Allow module to change analog characteristics (10G->1G) */ + msleep(40); + + status = ixgbe_setup_mac_link_82599(hw, + IXGBE_LINK_SPEED_1GB_FULL, + autoneg, + autoneg_wait_to_complete); + if (status != 0) + return status; + + /* Flap the tx laser if it has not already been done */ + ixgbe_flap_tx_laser(hw); + + /* Wait for the link partner to also set speed */ + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + return status; + + if (link_up) + goto out; + } + + /* + * We didn't get link. Configure back to the highest speed we tried, + * (if there was more than one). We call ourselves back with just the + * single highest speed that the user requested. + */ + if (speedcnt > 1) + status = ixgbe_setup_mac_link_multispeed_fiber(hw, + highest_link_speed, autoneg, autoneg_wait_to_complete); + +out: + /* Set autoneg_advertised value based on input link speed */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + return status; +} + +/** + * ixgbe_setup_mac_link_smartspeed - Set MAC link speed using SmartSpeed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Implements the Intel SmartSpeed algorithm. + **/ +s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + ixgbe_link_speed link_speed = IXGBE_LINK_SPEED_UNKNOWN; + s32 i, j; + bool link_up = false; + u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + + /* Set autoneg_advertised value based on input link speed */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + if (speed & IXGBE_LINK_SPEED_100_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + + /* + * Implement Intel SmartSpeed algorithm. SmartSpeed will reduce the + * autoneg advertisement if link is unable to be established at the + * highest negotiated rate. This can sometimes happen due to integrity + * issues with the physical media connection. + */ + + /* First, try to get link with full advertisement */ + hw->phy.smart_speed_active = false; + for (j = 0; j < IXGBE_SMARTSPEED_MAX_RETRIES; j++) { + status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, + autoneg_wait_to_complete); + if (status != 0) + goto out; + + /* + * Wait for the controller to acquire link. Per IEEE 802.3ap, + * Section 73.10.2, we may have to wait up to 500ms if KR is + * attempted, or 200ms if KX/KX4/BX/BX4 is attempted, per + * Table 9 in the AN MAS. + */ + for (i = 0; i < 5; i++) { + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, + false); + if (status != 0) + goto out; + + if (link_up) + goto out; + } + } + + /* + * We didn't get link. If we advertised KR plus one of KX4/KX + * (or BX4/BX), then disable KR and try again. + */ + if (((autoc_reg & IXGBE_AUTOC_KR_SUPP) == 0) || + ((autoc_reg & IXGBE_AUTOC_KX4_KX_SUPP_MASK) == 0)) + goto out; + + /* Turn SmartSpeed on to disable KR support */ + hw->phy.smart_speed_active = true; + status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, + autoneg_wait_to_complete); + if (status != 0) + goto out; + + /* + * Wait for the controller to acquire link. 600ms will allow for + * the AN link_fail_inhibit_timer as well for multiple cycles of + * parallel detect, both 10g and 1g. This allows for the maximum + * connect attempts as defined in the AN MAS table 73-7. + */ + for (i = 0; i < 6; i++) { + msleep(100); + + /* If we have link, just jump out */ + status = ixgbe_check_link(hw, &link_speed, &link_up, false); + if (status != 0) + goto out; + + if (link_up) + goto out; + } + + /* We didn't get link. Turn SmartSpeed back off. */ + hw->phy.smart_speed_active = false; + status = ixgbe_setup_mac_link_82599(hw, speed, autoneg, + autoneg_wait_to_complete); + +out: + if (link_up && (link_speed == IXGBE_LINK_SPEED_1GB_FULL)) + hw_dbg(hw, "Smartspeed has downgraded the link speed " + "from the maximum advertised\n"); + return status; +} + +/** + * ixgbe_setup_mac_link_82599 - Set MAC link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + * + * Set the link speed in the AUTOC register and restarts link. + **/ +s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status = 0; + u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + u32 start_autoc = autoc; + u32 orig_autoc = 0; + u32 link_mode = autoc & IXGBE_AUTOC_LMS_MASK; + u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; + u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; + u32 links_reg = 0; + u32 i; + ixgbe_link_speed link_capabilities = IXGBE_LINK_SPEED_UNKNOWN; + + /* Check to see if speed passed in is supported. */ + status = ixgbe_get_link_capabilities(hw, &link_capabilities, &autoneg); + if (status != 0) + goto out; + + speed &= link_capabilities; + + if (speed == IXGBE_LINK_SPEED_UNKNOWN) { + status = IXGBE_ERR_LINK_SETUP; + goto out; + } + + /* Use stored value (EEPROM defaults) of AUTOC to find KR/KX4 support*/ + if (hw->mac.orig_link_settings_stored) + orig_autoc = hw->mac.orig_autoc; + else + orig_autoc = autoc; + + if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { + /* Set KX4/KX/KR support according to speed requested */ + autoc &= ~(IXGBE_AUTOC_KX4_KX_SUPP_MASK | IXGBE_AUTOC_KR_SUPP); + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + if (orig_autoc & IXGBE_AUTOC_KX4_SUPP) + autoc |= IXGBE_AUTOC_KX4_SUPP; + if ((orig_autoc & IXGBE_AUTOC_KR_SUPP) && + (hw->phy.smart_speed_active == false)) + autoc |= IXGBE_AUTOC_KR_SUPP; + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + autoc |= IXGBE_AUTOC_KX_SUPP; + } else if ((pma_pmd_1g == IXGBE_AUTOC_1G_SFI) && + (link_mode == IXGBE_AUTOC_LMS_1G_LINK_NO_AN || + link_mode == IXGBE_AUTOC_LMS_1G_AN)) { + /* Switch from 1G SFI to 10G SFI if requested */ + if ((speed == IXGBE_LINK_SPEED_10GB_FULL) && + (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI)) { + autoc &= ~IXGBE_AUTOC_LMS_MASK; + autoc |= IXGBE_AUTOC_LMS_10G_SERIAL; + } + } else if ((pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) && + (link_mode == IXGBE_AUTOC_LMS_10G_SERIAL)) { + /* Switch from 10G SFI to 1G SFI if requested */ + if ((speed == IXGBE_LINK_SPEED_1GB_FULL) && + (pma_pmd_1g == IXGBE_AUTOC_1G_SFI)) { + autoc &= ~IXGBE_AUTOC_LMS_MASK; + if (autoneg) + autoc |= IXGBE_AUTOC_LMS_1G_AN; + else + autoc |= IXGBE_AUTOC_LMS_1G_LINK_NO_AN; + } + } + + if (autoc != start_autoc) { + /* Restart link */ + autoc |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc); + + /* Only poll for autoneg to complete if specified to do so */ + if (autoneg_wait_to_complete) { + if (link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN || + link_mode == IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII) { + for (i = 0; i < IXGBE_AUTO_NEG_TIME; i++) { + links_reg = + IXGBE_READ_REG(hw, IXGBE_LINKS); + if (links_reg & IXGBE_LINKS_KX_AN_COMP) + break; + msleep(100); + } + if (!(links_reg & IXGBE_LINKS_KX_AN_COMP)) { + status = + IXGBE_ERR_AUTONEG_NOT_COMPLETE; + hw_dbg(hw, "Autoneg did not complete.\n"); + } + } + } + + /* Add delay to filter out noises during initial link setup */ + msleep(50); + } + +out: + return status; +} + +/** + * ixgbe_setup_copper_link_82599 - Set the PHY autoneg advertised field + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true if waiting is needed to complete + * + * Restarts link on PHY and MAC based on settings passed in. + **/ +static s32 ixgbe_setup_copper_link_82599(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + s32 status; + + /* Setup the PHY according to input speed */ + status = hw->phy.ops.setup_link_speed(hw, speed, autoneg, + autoneg_wait_to_complete); + /* Set up MAC */ + ixgbe_start_mac_link_82599(hw, autoneg_wait_to_complete); + + return status; +} + +/** + * ixgbe_reset_hw_82599 - Perform hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks + * and clears all interrupts, perform a PHY reset, and perform a link (MAC) + * reset. + **/ +s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw) +{ +// ixgbe_link_speed link_speed; + s32 status = 0; +// u32 ctrl, i, autoc, autoc2; +// bool link_up = false; + +#if 0 + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != 0) + goto reset_hw_out; + + /* flush pending Tx transactions */ + ixgbe_clear_tx_pending(hw); + + /* PHY ops must be identified and initialized prior to reset */ + + /* Identify PHY and related function pointers */ + status = hw->phy.ops.init(hw); + + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + + /* Setup SFP module if there is one present. */ + if (hw->phy.sfp_setup_needed) { + status = hw->mac.ops.setup_sfp(hw); + hw->phy.sfp_setup_needed = false; + } + + if (status == IXGBE_ERR_SFP_NOT_SUPPORTED) + goto reset_hw_out; + + /* Reset PHY */ + if (hw->phy.reset_disable == false && hw->phy.ops.reset != NULL) + hw->phy.ops.reset(hw); + +mac_reset_top: + /* + * Issue global reset to the MAC. Needs to be SW reset if link is up. + * If link reset is used when link is up, it might reset the PHY when + * mng is using it. If link is down or the flag to force full link + * reset is set, then perform link reset. + */ + ctrl = IXGBE_CTRL_LNK_RST; + if (!hw->force_full_reset) { + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + if (link_up) + ctrl = IXGBE_CTRL_RST; + } + + ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST_MASK)) + break; + } + + if (ctrl & IXGBE_CTRL_RST_MASK) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "Reset polling failed to complete.\n"); + } + + msleep(50); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + /* + * Store the original AUTOC/AUTOC2 values if they have not been + * stored off yet. Otherwise restore the stored original + * values since the reset operation sets back to defaults. + */ + autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + if (hw->mac.orig_link_settings_stored == false) { + hw->mac.orig_autoc = autoc; + hw->mac.orig_autoc2 = autoc2; + hw->mac.orig_link_settings_stored = true; + } else { + if (autoc != hw->mac.orig_autoc) + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, (hw->mac.orig_autoc | + IXGBE_AUTOC_AN_RESTART)); + + if ((autoc2 & IXGBE_AUTOC2_UPPER_MASK) != + (hw->mac.orig_autoc2 & IXGBE_AUTOC2_UPPER_MASK)) { + autoc2 &= ~IXGBE_AUTOC2_UPPER_MASK; + autoc2 |= (hw->mac.orig_autoc2 & + IXGBE_AUTOC2_UPPER_MASK); + IXGBE_WRITE_REG(hw, IXGBE_AUTOC2, autoc2); + } + } +#endif + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table. Also reset num_rar_entries to 128, + * since we modify this value when programming the SAN MAC address. + */ + hw->mac.num_rar_entries = 128; + hw->mac.ops.init_rx_addrs(hw); + + /* Store the permanent SAN mac address */ + hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); + + /* Add the SAN MAC address to the RAR only if it's a valid address */ + if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { + hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, + hw->mac.san_addr, 0, IXGBE_RAH_AV); + + /* Save the SAN MAC RAR index */ + hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + + /* Reserve the last RAR for the SAN MAC address */ + hw->mac.num_rar_entries--; + } + + /* Store the alternative WWNN/WWPN prefix */ + hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix, + &hw->mac.wwpn_prefix); + +//reset_hw_out: + return status; +} + +/** + * ixgbe_reinit_fdir_tables_82599 - Reinitialize Flow Director tables. + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw) +{ + int i; + u32 fdirctrl = IXGBE_READ_REG(hw, IXGBE_FDIRCTRL); + fdirctrl &= ~IXGBE_FDIRCTRL_INIT_DONE; + + /* + * Before starting reinitialization process, + * FDIRCMD.CMD must be zero. + */ + for (i = 0; i < IXGBE_FDIRCMD_CMD_POLL; i++) { + if (!(IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + IXGBE_FDIRCMD_CMD_MASK)) + break; + udelay(10); + } + if (i >= IXGBE_FDIRCMD_CMD_POLL) { + hw_dbg(hw, "Flow Director previous command isn't complete, " + "aborting table re-initialization.\n"); + return IXGBE_ERR_FDIR_REINIT_FAILED; + } + + IXGBE_WRITE_REG(hw, IXGBE_FDIRFREE, 0); + IXGBE_WRITE_FLUSH(hw); + /* + * 82599 adapters flow director init flow cannot be restarted, + * Workaround 82599 silicon errata by performing the following steps + * before re-writing the FDIRCTRL control register with the same value. + * - write 1 to bit 8 of FDIRCMD register & + * - write 0 to bit 8 of FDIRCMD register + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) | + IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + (IXGBE_READ_REG(hw, IXGBE_FDIRCMD) & + ~IXGBE_FDIRCMD_CLEARHT)); + IXGBE_WRITE_FLUSH(hw); + /* + * Clear FDIR Hash register to clear any leftover hashes + * waiting to be programmed. + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, 0x00); + IXGBE_WRITE_FLUSH(hw); + + IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll init-done after we write FDIRCTRL register */ + for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { + if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & + IXGBE_FDIRCTRL_INIT_DONE) + break; + udelay(10); + } + if (i >= IXGBE_FDIR_INIT_DONE_POLL) { + hw_dbg(hw, "Flow Director Signature poll time exceeded!\n"); + return IXGBE_ERR_FDIR_REINIT_FAILED; + } + + /* Clear FDIR statistics registers (read to clear) */ + IXGBE_READ_REG(hw, IXGBE_FDIRUSTAT); + IXGBE_READ_REG(hw, IXGBE_FDIRFSTAT); + IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); + IXGBE_READ_REG(hw, IXGBE_FDIRMISS); + IXGBE_READ_REG(hw, IXGBE_FDIRLEN); + + return 0; +} + +/** + * ixgbe_fdir_enable_82599 - Initialize Flow Director control registers + * @hw: pointer to hardware structure + * @fdirctrl: value to write to flow director control register + **/ +static void ixgbe_fdir_enable_82599(struct ixgbe_hw *hw, u32 fdirctrl) +{ + int i; + + /* Prime the keys for hashing */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRHKEY, IXGBE_ATR_BUCKET_HASH_KEY); + IXGBE_WRITE_REG(hw, IXGBE_FDIRSKEY, IXGBE_ATR_SIGNATURE_HASH_KEY); + + /* + * Poll init-done after we write the register. Estimated times: + * 10G: PBALLOC = 11b, timing is 60us + * 1G: PBALLOC = 11b, timing is 600us + * 100M: PBALLOC = 11b, timing is 6ms + * + * Multiple these timings by 4 if under full Rx load + * + * So we'll poll for IXGBE_FDIR_INIT_DONE_POLL times, sleeping for + * 1 msec per poll time. If we're at line rate and drop to 100M, then + * this might not finish in our poll time, but we can live with that + * for now. + */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCTRL, fdirctrl); + IXGBE_WRITE_FLUSH(hw); + for (i = 0; i < IXGBE_FDIR_INIT_DONE_POLL; i++) { + if (IXGBE_READ_REG(hw, IXGBE_FDIRCTRL) & + IXGBE_FDIRCTRL_INIT_DONE) + break; + msleep(1); + } + + if (i >= IXGBE_FDIR_INIT_DONE_POLL) + hw_dbg(hw, "Flow Director poll time exceeded!\n"); +} + +/** + * ixgbe_init_fdir_signature_82599 - Initialize Flow Director signature filters + * @hw: pointer to hardware structure + * @fdirctrl: value to write to flow director control register, initially + * contains just the value of the Rx packet buffer allocation + **/ +s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl) +{ + /* + * Continue setup of fdirctrl register bits: + * Move the flexible bytes to use the ethertype - shift 6 words + * Set the maximum length per hash bucket to 0xA filters + * Send interrupt when 64 filters are left + */ + fdirctrl |= (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) | + (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) | + (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT); + + /* write hashes and fdirctrl register, poll for completion */ + ixgbe_fdir_enable_82599(hw, fdirctrl); + + return 0; +} + +/** + * ixgbe_init_fdir_perfect_82599 - Initialize Flow Director perfect filters + * @hw: pointer to hardware structure + * @fdirctrl: value to write to flow director control register, initially + * contains just the value of the Rx packet buffer allocation + **/ +s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl) +{ + /* + * Continue setup of fdirctrl register bits: + * Turn perfect match filtering on + * Report hash in RSS field of Rx wb descriptor + * Initialize the drop queue + * Move the flexible bytes to use the ethertype - shift 6 words + * Set the maximum length per hash bucket to 0xA filters + * Send interrupt when 64 (0x4 * 16) filters are left + */ + fdirctrl |= IXGBE_FDIRCTRL_PERFECT_MATCH | + IXGBE_FDIRCTRL_REPORT_STATUS | + (IXGBE_FDIR_DROP_QUEUE << IXGBE_FDIRCTRL_DROP_Q_SHIFT) | + (0x6 << IXGBE_FDIRCTRL_FLEX_SHIFT) | + (0xA << IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT) | + (4 << IXGBE_FDIRCTRL_FULL_THRESH_SHIFT); + + /* write hashes and fdirctrl register, poll for completion */ + ixgbe_fdir_enable_82599(hw, fdirctrl); + + return 0; +} + +/* + * These defines allow us to quickly generate all of the necessary instructions + * in the function below by simply calling out IXGBE_COMPUTE_SIG_HASH_ITERATION + * for values 0 through 15 + */ +#define IXGBE_ATR_COMMON_HASH_KEY \ + (IXGBE_ATR_BUCKET_HASH_KEY & IXGBE_ATR_SIGNATURE_HASH_KEY) +#define IXGBE_COMPUTE_SIG_HASH_ITERATION(_n) \ +do { \ + u32 n = (_n); \ + if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << n)) \ + common_hash ^= lo_hash_dword >> n; \ + else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ + bucket_hash ^= lo_hash_dword >> n; \ + else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << n)) \ + sig_hash ^= lo_hash_dword << (16 - n); \ + if (IXGBE_ATR_COMMON_HASH_KEY & (0x01 << (n + 16))) \ + common_hash ^= hi_hash_dword >> n; \ + else if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ + bucket_hash ^= hi_hash_dword >> n; \ + else if (IXGBE_ATR_SIGNATURE_HASH_KEY & (0x01 << (n + 16))) \ + sig_hash ^= hi_hash_dword << (16 - n); \ +} while (0); + +/** + * ixgbe_atr_compute_sig_hash_82599 - Compute the signature hash + * @stream: input bitstream to compute the hash on + * + * This function is almost identical to the function above but contains + * several optomizations such as unwinding all of the loops, letting the + * compiler work out all of the conditional ifs since the keys are static + * defines, and computing two keys at once since the hashed dword stream + * will be the same for both keys. + **/ +u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common) +{ + u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; + u32 sig_hash = 0, bucket_hash = 0, common_hash = 0; + + /* record the flow_vm_vlan bits as they are a key part to the hash */ + flow_vm_vlan = IXGBE_NTOHL(input.dword); + + /* generate common hash dword */ + hi_hash_dword = IXGBE_NTOHL(common.dword); + + /* low dword is word swapped version of common */ + lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); + + /* apply flow ID/VM pool/VLAN ID bits to hash words */ + hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); + + /* Process bits 0 and 16 */ + IXGBE_COMPUTE_SIG_HASH_ITERATION(0); + + /* + * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to + * delay this because bit 0 of the stream should not be processed + * so we do not add the vlan until after bit 0 was processed + */ + lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); + + /* Process remaining 30 bit of the key */ + IXGBE_COMPUTE_SIG_HASH_ITERATION(1); + IXGBE_COMPUTE_SIG_HASH_ITERATION(2); + IXGBE_COMPUTE_SIG_HASH_ITERATION(3); + IXGBE_COMPUTE_SIG_HASH_ITERATION(4); + IXGBE_COMPUTE_SIG_HASH_ITERATION(5); + IXGBE_COMPUTE_SIG_HASH_ITERATION(6); + IXGBE_COMPUTE_SIG_HASH_ITERATION(7); + IXGBE_COMPUTE_SIG_HASH_ITERATION(8); + IXGBE_COMPUTE_SIG_HASH_ITERATION(9); + IXGBE_COMPUTE_SIG_HASH_ITERATION(10); + IXGBE_COMPUTE_SIG_HASH_ITERATION(11); + IXGBE_COMPUTE_SIG_HASH_ITERATION(12); + IXGBE_COMPUTE_SIG_HASH_ITERATION(13); + IXGBE_COMPUTE_SIG_HASH_ITERATION(14); + IXGBE_COMPUTE_SIG_HASH_ITERATION(15); + + /* combine common_hash result with signature and bucket hashes */ + bucket_hash ^= common_hash; + bucket_hash &= IXGBE_ATR_HASH_MASK; + + sig_hash ^= common_hash << 16; + sig_hash &= IXGBE_ATR_HASH_MASK << 16; + + /* return completed signature hash */ + return sig_hash ^ bucket_hash; +} + +/** + * ixgbe_atr_add_signature_filter_82599 - Adds a signature hash filter + * @hw: pointer to hardware structure + * @input: unique input dword + * @common: compressed common input dword + * @queue: queue index to direct traffic to + **/ +s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common, + u8 queue) +{ + u64 fdirhashcmd; + u32 fdircmd; + + /* + * Get the flow_type in order to program FDIRCMD properly + * lowest 2 bits are FDIRCMD.L4TYPE, third lowest bit is FDIRCMD.IPV6 + */ + switch (input.formatted.flow_type) { + case IXGBE_ATR_FLOW_TYPE_TCPV4: + case IXGBE_ATR_FLOW_TYPE_UDPV4: + case IXGBE_ATR_FLOW_TYPE_SCTPV4: + case IXGBE_ATR_FLOW_TYPE_TCPV6: + case IXGBE_ATR_FLOW_TYPE_UDPV6: + case IXGBE_ATR_FLOW_TYPE_SCTPV6: + break; + default: + hw_dbg(hw, " Error on flow type input\n"); + return IXGBE_ERR_CONFIG; + } + + /* configure FDIRCMD register */ + fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | + IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; + fdircmd |= input.formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; + fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; + + /* + * The lower 32-bits of fdirhashcmd is for FDIRHASH, the upper 32-bits + * is for FDIRCMD. Then do a 64-bit register write from FDIRHASH. + */ + fdirhashcmd = (u64)fdircmd << 32; + fdirhashcmd |= ixgbe_atr_compute_sig_hash_82599(input, common); + IXGBE_WRITE_REG64(hw, IXGBE_FDIRHASH, fdirhashcmd); + + hw_dbg(hw, "Tx Queue=%x hash=%x\n", queue, (u32)fdirhashcmd); + + return 0; +} + +#define IXGBE_COMPUTE_BKT_HASH_ITERATION(_n) \ +do { \ + u32 n = (_n); \ + if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << n)) \ + bucket_hash ^= lo_hash_dword >> n; \ + if (IXGBE_ATR_BUCKET_HASH_KEY & (0x01 << (n + 16))) \ + bucket_hash ^= hi_hash_dword >> n; \ +} while (0); + +/** + * ixgbe_atr_compute_perfect_hash_82599 - Compute the perfect filter hash + * @atr_input: input bitstream to compute the hash on + * @input_mask: mask for the input bitstream + * + * This function serves two main purposes. First it applys the input_mask + * to the atr_input resulting in a cleaned up atr_input data stream. + * Secondly it computes the hash and stores it in the bkt_hash field at + * the end of the input byte stream. This way it will be available for + * future use without needing to recompute the hash. + **/ +void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, + union ixgbe_atr_input *input_mask) +{ + + u32 hi_hash_dword, lo_hash_dword, flow_vm_vlan; + u32 bucket_hash = 0; + + /* Apply masks to input data */ + input->dword_stream[0] &= input_mask->dword_stream[0]; + input->dword_stream[1] &= input_mask->dword_stream[1]; + input->dword_stream[2] &= input_mask->dword_stream[2]; + input->dword_stream[3] &= input_mask->dword_stream[3]; + input->dword_stream[4] &= input_mask->dword_stream[4]; + input->dword_stream[5] &= input_mask->dword_stream[5]; + input->dword_stream[6] &= input_mask->dword_stream[6]; + input->dword_stream[7] &= input_mask->dword_stream[7]; + input->dword_stream[8] &= input_mask->dword_stream[8]; + input->dword_stream[9] &= input_mask->dword_stream[9]; + input->dword_stream[10] &= input_mask->dword_stream[10]; + + /* record the flow_vm_vlan bits as they are a key part to the hash */ + flow_vm_vlan = IXGBE_NTOHL(input->dword_stream[0]); + + /* generate common hash dword */ + hi_hash_dword = IXGBE_NTOHL(input->dword_stream[1] ^ + input->dword_stream[2] ^ + input->dword_stream[3] ^ + input->dword_stream[4] ^ + input->dword_stream[5] ^ + input->dword_stream[6] ^ + input->dword_stream[7] ^ + input->dword_stream[8] ^ + input->dword_stream[9] ^ + input->dword_stream[10]); + + /* low dword is word swapped version of common */ + lo_hash_dword = (hi_hash_dword >> 16) | (hi_hash_dword << 16); + + /* apply flow ID/VM pool/VLAN ID bits to hash words */ + hi_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan >> 16); + + /* Process bits 0 and 16 */ + IXGBE_COMPUTE_BKT_HASH_ITERATION(0); + + /* + * apply flow ID/VM pool/VLAN ID bits to lo hash dword, we had to + * delay this because bit 0 of the stream should not be processed + * so we do not add the vlan until after bit 0 was processed + */ + lo_hash_dword ^= flow_vm_vlan ^ (flow_vm_vlan << 16); + + /* Process remaining 30 bit of the key */ + IXGBE_COMPUTE_BKT_HASH_ITERATION(1); + IXGBE_COMPUTE_BKT_HASH_ITERATION(2); + IXGBE_COMPUTE_BKT_HASH_ITERATION(3); + IXGBE_COMPUTE_BKT_HASH_ITERATION(4); + IXGBE_COMPUTE_BKT_HASH_ITERATION(5); + IXGBE_COMPUTE_BKT_HASH_ITERATION(6); + IXGBE_COMPUTE_BKT_HASH_ITERATION(7); + IXGBE_COMPUTE_BKT_HASH_ITERATION(8); + IXGBE_COMPUTE_BKT_HASH_ITERATION(9); + IXGBE_COMPUTE_BKT_HASH_ITERATION(10); + IXGBE_COMPUTE_BKT_HASH_ITERATION(11); + IXGBE_COMPUTE_BKT_HASH_ITERATION(12); + IXGBE_COMPUTE_BKT_HASH_ITERATION(13); + IXGBE_COMPUTE_BKT_HASH_ITERATION(14); + IXGBE_COMPUTE_BKT_HASH_ITERATION(15); + + /* + * Limit hash to 13 bits since max bucket count is 8K. + * Store result at the end of the input stream. + */ + input->formatted.bkt_hash = bucket_hash & 0x1FFF; +} + +/** + * ixgbe_get_fdirtcpm_82599 - generate a tcp port from atr_input_masks + * @input_mask: mask to be bit swapped + * + * The source and destination port masks for flow director are bit swapped + * in that bit 15 effects bit 0, 14 effects 1, 13, 2 etc. In order to + * generate a correctly swapped value we need to bit swap the mask and that + * is what is accomplished by this function. + **/ +static u32 ixgbe_get_fdirtcpm_82599(union ixgbe_atr_input *input_mask) +{ + u32 mask = IXGBE_NTOHS(input_mask->formatted.dst_port); + mask <<= IXGBE_FDIRTCPM_DPORTM_SHIFT; + mask |= IXGBE_NTOHS(input_mask->formatted.src_port); + mask = ((mask & 0x55555555) << 1) | ((mask & 0xAAAAAAAA) >> 1); + mask = ((mask & 0x33333333) << 2) | ((mask & 0xCCCCCCCC) >> 2); + mask = ((mask & 0x0F0F0F0F) << 4) | ((mask & 0xF0F0F0F0) >> 4); + return ((mask & 0x00FF00FF) << 8) | ((mask & 0xFF00FF00) >> 8); +} + +/* + * These two macros are meant to address the fact that we have registers + * that are either all or in part big-endian. As a result on big-endian + * systems we will end up byte swapping the value to little-endian before + * it is byte swapped again and written to the hardware in the original + * big-endian format. + */ +#define IXGBE_STORE_AS_BE32(_value) \ + (((u32)(_value) >> 24) | (((u32)(_value) & 0x00FF0000) >> 8) | \ + (((u32)(_value) & 0x0000FF00) << 8) | ((u32)(_value) << 24)) + +#define IXGBE_WRITE_REG_BE32(a, reg, value) \ + IXGBE_WRITE_REG((a), (reg), IXGBE_STORE_AS_BE32(IXGBE_NTOHL(value))) + +#define IXGBE_STORE_AS_BE16(_value) \ + IXGBE_NTOHS(((u16)(_value) >> 8) | ((u16)(_value) << 8)) + +s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input_mask) +{ + /* mask IPv6 since it is currently not supported */ + u32 fdirm = IXGBE_FDIRM_DIPv6; + u32 fdirtcpm; + + /* + * Program the relevant mask registers. If src/dst_port or src/dst_addr + * are zero, then assume a full mask for that field. Also assume that + * a VLAN of 0 is unspecified, so mask that out as well. L4type + * cannot be masked out in this implementation. + * + * This also assumes IPv4 only. IPv6 masking isn't supported at this + * point in time. + */ + + /* verify bucket hash is cleared on hash generation */ + if (input_mask->formatted.bkt_hash) + hw_dbg(hw, " bucket hash should always be 0 in mask\n"); + + /* Program FDIRM and verify partial masks */ + switch (input_mask->formatted.vm_pool & 0x7F) { + case 0x0: + fdirm |= IXGBE_FDIRM_POOL; + case 0x7F: + break; + default: + hw_dbg(hw, " Error on vm pool mask\n"); + return IXGBE_ERR_CONFIG; + } + + switch (input_mask->formatted.flow_type & IXGBE_ATR_L4TYPE_MASK) { + case 0x0: + fdirm |= IXGBE_FDIRM_L4P; + if (input_mask->formatted.dst_port || + input_mask->formatted.src_port) { + hw_dbg(hw, " Error on src/dst port mask\n"); + return IXGBE_ERR_CONFIG; + } + case IXGBE_ATR_L4TYPE_MASK: + break; + default: + hw_dbg(hw, " Error on flow type mask\n"); + return IXGBE_ERR_CONFIG; + } + + switch (IXGBE_NTOHS(input_mask->formatted.vlan_id) & 0xEFFF) { + case 0x0000: + /* mask VLAN ID, fall through to mask VLAN priority */ + fdirm |= IXGBE_FDIRM_VLANID; + case 0x0FFF: + /* mask VLAN priority */ + fdirm |= IXGBE_FDIRM_VLANP; + break; + case 0xE000: + /* mask VLAN ID only, fall through */ + fdirm |= IXGBE_FDIRM_VLANID; + case 0xEFFF: + /* no VLAN fields masked */ + break; + default: + hw_dbg(hw, " Error on VLAN mask\n"); + return IXGBE_ERR_CONFIG; + } + + switch (input_mask->formatted.flex_bytes & 0xFFFF) { + case 0x0000: + /* Mask Flex Bytes, fall through */ + fdirm |= IXGBE_FDIRM_FLEX; + case 0xFFFF: + break; + default: + hw_dbg(hw, " Error on flexible byte mask\n"); + return IXGBE_ERR_CONFIG; + } + + /* Now mask VM pool and destination IPv6 - bits 5 and 2 */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRM, fdirm); + + /* store the TCP/UDP port masks, bit reversed from port layout */ + fdirtcpm = ixgbe_get_fdirtcpm_82599(input_mask); + + /* write both the same so that UDP and TCP use the same mask */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRTCPM, ~fdirtcpm); + IXGBE_WRITE_REG(hw, IXGBE_FDIRUDPM, ~fdirtcpm); + + /* store source and destination IP masks (big-enian) */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIP4M, + ~input_mask->formatted.src_ip[0]); + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRDIP4M, + ~input_mask->formatted.dst_ip[0]); + + return 0; +} + +s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id, u8 queue) +{ + u32 fdirport, fdirvlan, fdirhash, fdircmd; + + /* currently IPv6 is not supported, must be programmed with 0 */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(0), + input->formatted.src_ip[0]); + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(1), + input->formatted.src_ip[1]); + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRSIPv6(2), + input->formatted.src_ip[2]); + + /* record the source address (big-endian) */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPSA, input->formatted.src_ip[0]); + + /* record the first 32 bits of the destination address (big-endian) */ + IXGBE_WRITE_REG_BE32(hw, IXGBE_FDIRIPDA, input->formatted.dst_ip[0]); + + /* record source and destination port (little-endian)*/ + fdirport = IXGBE_NTOHS(input->formatted.dst_port); + fdirport <<= IXGBE_FDIRPORT_DESTINATION_SHIFT; + fdirport |= IXGBE_NTOHS(input->formatted.src_port); + IXGBE_WRITE_REG(hw, IXGBE_FDIRPORT, fdirport); + + /* record vlan (little-endian) and flex_bytes(big-endian) */ + fdirvlan = IXGBE_STORE_AS_BE16(input->formatted.flex_bytes); + fdirvlan <<= IXGBE_FDIRVLAN_FLEX_SHIFT; + fdirvlan |= IXGBE_NTOHS(input->formatted.vlan_id); + IXGBE_WRITE_REG(hw, IXGBE_FDIRVLAN, fdirvlan); + + /* configure FDIRHASH register */ + fdirhash = input->formatted.bkt_hash; + fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); + + /* + * flush all previous writes to make certain registers are + * programmed prior to issuing the command + */ + IXGBE_WRITE_FLUSH(hw); + + /* configure FDIRCMD register */ + fdircmd = IXGBE_FDIRCMD_CMD_ADD_FLOW | IXGBE_FDIRCMD_FILTER_UPDATE | + IXGBE_FDIRCMD_LAST | IXGBE_FDIRCMD_QUEUE_EN; + if (queue == IXGBE_FDIR_DROP_QUEUE) + fdircmd |= IXGBE_FDIRCMD_DROP; + fdircmd |= input->formatted.flow_type << IXGBE_FDIRCMD_FLOW_TYPE_SHIFT; + fdircmd |= (u32)queue << IXGBE_FDIRCMD_RX_QUEUE_SHIFT; + fdircmd |= (u32)input->formatted.vm_pool << IXGBE_FDIRCMD_VT_POOL_SHIFT; + + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, fdircmd); + + return 0; +} + +s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id) +{ + u32 fdirhash; + u32 fdircmd = 0; + u32 retry_count; + s32 err = 0; + + /* configure FDIRHASH register */ + fdirhash = input->formatted.bkt_hash; + fdirhash |= soft_id << IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT; + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); + + /* flush hash to HW */ + IXGBE_WRITE_FLUSH(hw); + + /* Query if filter is present */ + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, IXGBE_FDIRCMD_CMD_QUERY_REM_FILT); + + for (retry_count = 10; retry_count; retry_count--) { + /* allow 10us for query to process */ + udelay(10); + /* verify query completed successfully */ + fdircmd = IXGBE_READ_REG(hw, IXGBE_FDIRCMD); + if (!(fdircmd & IXGBE_FDIRCMD_CMD_MASK)) + break; + } + + if (!retry_count) + err = IXGBE_ERR_FDIR_REINIT_FAILED; + + /* if filter exists in hardware then remove it */ + if (fdircmd & IXGBE_FDIRCMD_FILTER_VALID) { + IXGBE_WRITE_REG(hw, IXGBE_FDIRHASH, fdirhash); + IXGBE_WRITE_FLUSH(hw); + IXGBE_WRITE_REG(hw, IXGBE_FDIRCMD, + IXGBE_FDIRCMD_CMD_REMOVE_FLOW); + } + + return err; +} + +/** + * ixgbe_fdir_add_perfect_filter_82599 - Adds a perfect filter + * @hw: pointer to hardware structure + * @input: input bitstream + * @input_mask: mask for the input bitstream + * @soft_id: software index for the filters + * @queue: queue index to direct traffic to + * + * Note that the caller to this function must lock before calling, since the + * hardware writes must be protected from one another. + **/ +s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + union ixgbe_atr_input *input_mask, + u16 soft_id, u8 queue) +{ + s32 err = IXGBE_ERR_CONFIG; + + /* + * Check flow_type formatting, and bail out before we touch the hardware + * if there's a configuration issue + */ + switch (input->formatted.flow_type) { + case IXGBE_ATR_FLOW_TYPE_IPV4: + input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK; + if (input->formatted.dst_port || input->formatted.src_port) { + hw_dbg(hw, " Error on src/dst port\n"); + return IXGBE_ERR_CONFIG; + } + break; + case IXGBE_ATR_FLOW_TYPE_SCTPV4: + if (input->formatted.dst_port || input->formatted.src_port) { + hw_dbg(hw, " Error on src/dst port\n"); + return IXGBE_ERR_CONFIG; + } + case IXGBE_ATR_FLOW_TYPE_TCPV4: + case IXGBE_ATR_FLOW_TYPE_UDPV4: + input_mask->formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + break; + default: + hw_dbg(hw, " Error on flow type input\n"); + return err; + } + + /* program input mask into the HW */ + err = ixgbe_fdir_set_input_mask_82599(hw, input_mask); + if (err) + return err; + + /* apply mask and compute/store hash */ + ixgbe_atr_compute_perfect_hash_82599(input, input_mask); + + /* program filters to filter memory */ + return ixgbe_fdir_write_perfect_filter_82599(hw, input, + soft_id, queue); +} + +/** + * ixgbe_read_analog_reg8_82599 - Reads 8 bit Omer analog register + * @hw: pointer to hardware structure + * @reg: analog register to read + * @val: read value + * + * Performs read operation to Omer analog register specified. + **/ +s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val) +{ + u32 core_ctl; + + IXGBE_WRITE_REG(hw, IXGBE_CORECTL, IXGBE_CORECTL_WRITE_CMD | + (reg << 8)); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + core_ctl = IXGBE_READ_REG(hw, IXGBE_CORECTL); + *val = (u8)core_ctl; + + return 0; +} + +/** + * ixgbe_write_analog_reg8_82599 - Writes 8 bit Omer analog register + * @hw: pointer to hardware structure + * @reg: atlas register to write + * @val: value to write + * + * Performs write operation to Omer analog register specified. + **/ +s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val) +{ + u32 core_ctl; + + core_ctl = (reg << 8) | val; + IXGBE_WRITE_REG(hw, IXGBE_CORECTL, core_ctl); + IXGBE_WRITE_FLUSH(hw); + udelay(10); + + return 0; +} + +/** + * ixgbe_start_hw_82599 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + if (ret_val != 0) + goto out; + + ret_val = ixgbe_start_hw_gen2(hw); + if (ret_val != 0) + goto out; + + /* We need to run link autotry after the driver loads */ + hw->mac.autotry_restart = true; + + if (ret_val == 0) + ret_val = ixgbe_verify_fw_version_82599(hw); +out: + return ret_val; +} + +/** + * ixgbe_identify_phy_82599 - Get physical layer module + * @hw: pointer to hardware structure + * + * Determines the physical layer module found on the current adapter. + * If PHY already detected, maintains current PHY type in hw struct, + * otherwise executes the PHY detection routine. + **/ +s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + + /* Detect PHY if not unknown - returns success if already detected. */ + status = ixgbe_identify_phy_generic(hw); + if (status != 0) { + /* 82599 10GBASE-T requires an external PHY */ + if (hw->mac.ops.get_media_type(hw) == ixgbe_media_type_copper) + goto out; + else + status = ixgbe_identify_module_generic(hw); + } + + /* Set PHY type none if no PHY detected */ + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.type = ixgbe_phy_none; + status = 0; + } + + /* Return error if SFP module has been detected but is not supported */ + if (hw->phy.type == ixgbe_phy_sfp_unsupported) + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + +out: + return status; +} + +/** + * ixgbe_get_supported_physical_layer_82599 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw) +{ + u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + u32 autoc = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 autoc2 = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + u32 pma_pmd_10g_serial = autoc2 & IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK; + u32 pma_pmd_10g_parallel = autoc & IXGBE_AUTOC_10G_PMA_PMD_MASK; + u32 pma_pmd_1g = autoc & IXGBE_AUTOC_1G_PMA_PMD_MASK; + u16 ext_ability = 0; + u8 comp_codes_10g = 0; + u8 comp_codes_1g = 0; + + hw->phy.ops.identify(hw); + + switch (hw->phy.type) { + case ixgbe_phy_tn: + case ixgbe_phy_cu_unknown: + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + goto out; + default: + break; + } + + switch (autoc & IXGBE_AUTOC_LMS_MASK) { + case IXGBE_AUTOC_LMS_1G_AN: + case IXGBE_AUTOC_LMS_1G_LINK_NO_AN: + if (pma_pmd_1g == IXGBE_AUTOC_1G_KX_BX) { + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_KX | + IXGBE_PHYSICAL_LAYER_1000BASE_BX; + goto out; + } else + /* SFI mode so read SFP module */ + goto sfp_check; + break; + case IXGBE_AUTOC_LMS_10G_LINK_NO_AN: + if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_CX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_CX4; + else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_KX4) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + else if (pma_pmd_10g_parallel == IXGBE_AUTOC_10G_XAUI) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_XAUI; + goto out; + break; + case IXGBE_AUTOC_LMS_10G_SERIAL: + if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_KR) { + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_KR; + goto out; + } else if (pma_pmd_10g_serial == IXGBE_AUTOC2_10G_SFI) + goto sfp_check; + break; + case IXGBE_AUTOC_LMS_KX4_KX_KR: + case IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN: + if (autoc & IXGBE_AUTOC_KX_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_KX; + if (autoc & IXGBE_AUTOC_KX4_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KX4; + if (autoc & IXGBE_AUTOC_KR_SUPP) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_KR; + goto out; + break; + default: + goto out; + break; + } + +sfp_check: + /* SFP check must be done last since DA modules are sometimes used to + * test KR mode - we need to id KR mode correctly before SFP module. + * Call identify_sfp because the pluggable module may have changed */ + hw->phy.ops.identify_sfp(hw); + if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) + goto out; + + switch (hw->phy.type) { + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU; + break; + case ixgbe_phy_sfp_ftl_active: + case ixgbe_phy_sfp_active_unknown: + physical_layer = IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA; + break; + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_1GBE_COMP_CODES, &comp_codes_1g); + hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_10GBE_COMP_CODES, &comp_codes_10g); + if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_SR; + else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_10GBASE_LR; + else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_T; + else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) + physical_layer = IXGBE_PHYSICAL_LAYER_1000BASE_SX; + break; + default: + break; + } + +out: + return physical_layer; +} + +/** + * ixgbe_enable_rx_dma_82599 - Enable the Rx DMA unit on 82599 + * @hw: pointer to hardware structure + * @regval: register value to write to RXCTRL + * + * Enables the Rx DMA unit for 82599 + **/ +s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval) +{ + + /* + * Workaround for 82599 silicon errata when enabling the Rx datapath. + * If traffic is incoming before we enable the Rx unit, it could hang + * the Rx DMA unit. Therefore, make sure the security engine is + * completely disabled prior to enabling the Rx unit. + */ + + hw->mac.ops.disable_sec_rx_path(hw); + + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval); + + hw->mac.ops.enable_sec_rx_path(hw); + + return 0; +} + +/** + * ixgbe_verify_fw_version_82599 - verify fw version for 82599 + * @hw: pointer to hardware structure + * + * Verifies that installed the firmware version is 0.6 or higher + * for SFI devices. All 82599 SFI devices should have version 0.6 or higher. + * + * Returns IXGBE_ERR_EEPROM_VERSION if the FW is not present or + * if the FW version is not supported. + **/ +static s32 ixgbe_verify_fw_version_82599(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM_VERSION; + u16 fw_offset, fw_ptp_cfg_offset; + u16 fw_version = 0; + + /* firmware check is only necessary for SFI devices */ + if (hw->phy.media_type != ixgbe_media_type_fiber) { + status = 0; + goto fw_version_out; + } + + /* get the offset to the Firmware Module block */ + hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); + + if ((fw_offset == 0) || (fw_offset == 0xFFFF)) + goto fw_version_out; + + /* get the offset to the Pass Through Patch Configuration block */ + hw->eeprom.ops.read(hw, (fw_offset + + IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR), + &fw_ptp_cfg_offset); + + if ((fw_ptp_cfg_offset == 0) || (fw_ptp_cfg_offset == 0xFFFF)) + goto fw_version_out; + + /* get the firmware version */ + hw->eeprom.ops.read(hw, (fw_ptp_cfg_offset + + IXGBE_FW_PATCH_VERSION_4), &fw_version); + + if (fw_version > 0x5) + status = 0; + +fw_version_out: + return status; +} + +/** + * ixgbe_verify_lesm_fw_enabled_82599 - Checks LESM FW module state. + * @hw: pointer to hardware structure + * + * Returns true if the LESM FW module is present and enabled. Otherwise + * returns false. Smart Speed must be disabled if LESM FW module is enabled. + **/ +bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw) +{ + bool lesm_enabled = false; + u16 fw_offset, fw_lesm_param_offset, fw_lesm_state; + s32 status; + + /* get the offset to the Firmware Module block */ + status = hw->eeprom.ops.read(hw, IXGBE_FW_PTR, &fw_offset); + + if ((status != 0) || + (fw_offset == 0) || (fw_offset == 0xFFFF)) + goto out; + + /* get the offset to the LESM Parameters block */ + status = hw->eeprom.ops.read(hw, (fw_offset + + IXGBE_FW_LESM_PARAMETERS_PTR), + &fw_lesm_param_offset); + + if ((status != 0) || + (fw_lesm_param_offset == 0) || (fw_lesm_param_offset == 0xFFFF)) + goto out; + + /* get the lesm state word */ + status = hw->eeprom.ops.read(hw, (fw_lesm_param_offset + + IXGBE_FW_LESM_STATE_1), + &fw_lesm_state); + + if ((status == 0) && + (fw_lesm_state & IXGBE_FW_LESM_STATE_ENABLED)) + lesm_enabled = true; + +out: + return lesm_enabled; +} + +/** + * ixgbe_read_eeprom_buffer_82599 - Read EEPROM word(s) using + * fastest available method + * + * @hw: pointer to hardware structure + * @offset: offset of word in EEPROM to read + * @words: number of words + * @data: word(s) read from the EEPROM + * + * Retrieves 16 bit word(s) read from EEPROM + **/ +static s32 ixgbe_read_eeprom_buffer_82599(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val = IXGBE_ERR_CONFIG; + + /* + * If EEPROM is detected and can be addressed using 14 bits, + * use EERD otherwise use bit bang + */ + if ((eeprom->type == ixgbe_eeprom_spi) && + (offset + (words - 1) <= IXGBE_EERD_MAX_ADDR)) + ret_val = ixgbe_read_eerd_buffer_generic(hw, offset, words, + data); + else + ret_val = ixgbe_read_eeprom_buffer_bit_bang_generic(hw, offset, + words, + data); + + return ret_val; +} + +/** + * ixgbe_read_eeprom_82599 - Read EEPROM word using + * fastest available method + * + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM + **/ +static s32 ixgbe_read_eeprom_82599(struct ixgbe_hw *hw, + u16 offset, u16 *data) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val = IXGBE_ERR_CONFIG; + + /* + * If EEPROM is detected and can be addressed using 14 bits, + * use EERD otherwise use bit bang + */ + if ((eeprom->type == ixgbe_eeprom_spi) && + (offset <= IXGBE_EERD_MAX_ADDR)) + ret_val = ixgbe_read_eerd_generic(hw, offset, data); + else + ret_val = ixgbe_read_eeprom_bit_bang_generic(hw, offset, data); + + return ret_val; +} + +/** + * ixgbe_read_i2c_byte_82599 - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +static s32 ixgbe_read_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + u32 esdp; + s32 status; + s32 timeout = 200; + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Acquire I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp |= IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + + while (timeout) { + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + if (esdp & IXGBE_ESDP_SDP1) + break; + + msleep(5); + timeout--; + } + + if (!timeout) { + hw_dbg(hw, "Driver can't access resource," + " acquiring I2C bus timeout.\n"); + status = IXGBE_ERR_I2C; + goto release_i2c_access; + } + } + + status = ixgbe_read_i2c_byte_generic(hw, byte_offset, dev_addr, data); + +release_i2c_access: + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Release I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp &= ~IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + } + + return status; +} + +/** + * ixgbe_write_i2c_byte_82599 - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +static s32 ixgbe_write_i2c_byte_82599(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + u32 esdp; + s32 status; + s32 timeout = 200; + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Acquire I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp |= IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + + while (timeout) { + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + if (esdp & IXGBE_ESDP_SDP1) + break; + + msleep(5); + timeout--; + } + + if (!timeout) { + hw_dbg(hw, "Driver can't access resource," + " acquiring I2C bus timeout.\n"); + status = IXGBE_ERR_I2C; + goto release_i2c_access; + } + } + + status = ixgbe_write_i2c_byte_generic(hw, byte_offset, dev_addr, data); + +release_i2c_access: + + if (hw->phy.qsfp_shared_i2c_bus == TRUE) { + /* Release I2C bus ownership. */ + esdp = IXGBE_READ_REG(hw, IXGBE_ESDP); + esdp &= ~IXGBE_ESDP_SDP0; + IXGBE_WRITE_REG(hw, IXGBE_ESDP, esdp); + IXGBE_WRITE_FLUSH(hw); + } + + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h new file mode 100644 index 00000000..02be92ab --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_82599.h @@ -0,0 +1,58 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_82599_H_ +#define _IXGBE_82599_H_ + +s32 ixgbe_get_link_capabilities_82599(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *autoneg); +enum ixgbe_media_type ixgbe_get_media_type_82599(struct ixgbe_hw *hw); +void ixgbe_disable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); +void ixgbe_enable_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); +void ixgbe_flap_tx_laser_multispeed_fiber(struct ixgbe_hw *hw); +s32 ixgbe_setup_mac_link_multispeed_fiber(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete); +s32 ixgbe_setup_mac_link_smartspeed(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete); +s32 ixgbe_start_mac_link_82599(struct ixgbe_hw *hw, + bool autoneg_wait_to_complete); +s32 ixgbe_setup_mac_link_82599(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, bool autoneg_wait_to_complete); +s32 ixgbe_setup_sfp_modules_82599(struct ixgbe_hw *hw); +void ixgbe_init_mac_link_ops_82599(struct ixgbe_hw *hw); +s32 ixgbe_reset_hw_82599(struct ixgbe_hw *hw); +s32 ixgbe_read_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 *val); +s32 ixgbe_write_analog_reg8_82599(struct ixgbe_hw *hw, u32 reg, u8 val); +s32 ixgbe_start_hw_82599(struct ixgbe_hw *hw); +s32 ixgbe_identify_phy_82599(struct ixgbe_hw *hw); +s32 ixgbe_init_phy_ops_82599(struct ixgbe_hw *hw); +u32 ixgbe_get_supported_physical_layer_82599(struct ixgbe_hw *hw); +s32 ixgbe_enable_rx_dma_82599(struct ixgbe_hw *hw, u32 regval); +bool ixgbe_verify_lesm_fw_enabled_82599(struct ixgbe_hw *hw); +#endif /* _IXGBE_82599_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c new file mode 100644 index 00000000..ef7ce629 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.c @@ -0,0 +1,1157 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_api.h" +#include "ixgbe_common.h" + +/** + * ixgbe_init_shared_code - Initialize the shared code + * @hw: pointer to hardware structure + * + * This will assign function pointers and assign the MAC type and PHY code. + * Does not touch the hardware. This function must be called prior to any + * other function in the shared code. The ixgbe_hw structure should be + * memset to 0 prior to calling this function. The following fields in + * hw structure should be filled in prior to calling this function: + * hw_addr, back, device_id, vendor_id, subsystem_device_id, + * subsystem_vendor_id, and revision_id + **/ +s32 ixgbe_init_shared_code(struct ixgbe_hw *hw) +{ + s32 status; + + /* + * Set the mac type + */ + ixgbe_set_mac_type(hw); + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + status = ixgbe_init_ops_82598(hw); + break; + case ixgbe_mac_82599EB: + status = ixgbe_init_ops_82599(hw); + break; + case ixgbe_mac_X540: + status = ixgbe_init_ops_X540(hw); + break; + default: + status = IXGBE_ERR_DEVICE_NOT_SUPPORTED; + break; + } + + return status; +} + +/** + * ixgbe_set_mac_type - Sets MAC type + * @hw: pointer to the HW structure + * + * This function sets the mac type of the adapter based on the + * vendor ID and device ID stored in the hw structure. + **/ +s32 ixgbe_set_mac_type(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + if (hw->vendor_id == IXGBE_INTEL_VENDOR_ID) { + switch (hw->device_id) { + case IXGBE_DEV_ID_82598: + case IXGBE_DEV_ID_82598_BX: + case IXGBE_DEV_ID_82598AF_SINGLE_PORT: + case IXGBE_DEV_ID_82598AF_DUAL_PORT: + case IXGBE_DEV_ID_82598AT: + case IXGBE_DEV_ID_82598AT2: + case IXGBE_DEV_ID_82598EB_CX4: + case IXGBE_DEV_ID_82598_CX4_DUAL_PORT: + case IXGBE_DEV_ID_82598_DA_DUAL_PORT: + case IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM: + case IXGBE_DEV_ID_82598EB_XF_LR: + case IXGBE_DEV_ID_82598EB_SFP_LOM: + hw->mac.type = ixgbe_mac_82598EB; + break; + case IXGBE_DEV_ID_82599_KX4: + case IXGBE_DEV_ID_82599_KX4_MEZZ: + case IXGBE_DEV_ID_82599_XAUI_LOM: + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + case IXGBE_DEV_ID_82599_KR: + case IXGBE_DEV_ID_82599_SFP: + case IXGBE_DEV_ID_82599_BACKPLANE_FCOE: + case IXGBE_DEV_ID_82599_SFP_FCOE: + case IXGBE_DEV_ID_82599_SFP_EM: + case IXGBE_DEV_ID_82599_SFP_SF2: + case IXGBE_DEV_ID_82599_QSFP_SF_QP: + case IXGBE_DEV_ID_82599EN_SFP: + case IXGBE_DEV_ID_82599_CX4: + case IXGBE_DEV_ID_82599_LS: + case IXGBE_DEV_ID_82599_T3_LOM: + hw->mac.type = ixgbe_mac_82599EB; + break; + case IXGBE_DEV_ID_X540T: + hw->mac.type = ixgbe_mac_X540; + break; + default: + ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; + break; + } + } else { + ret_val = IXGBE_ERR_DEVICE_NOT_SUPPORTED; + } + + hw_dbg(hw, "ixgbe_set_mac_type found mac: %d, returns: %d\n", + hw->mac.type, ret_val); + return ret_val; +} + +/** + * ixgbe_init_hw - Initialize the hardware + * @hw: pointer to hardware structure + * + * Initialize the hardware by resetting and then starting the hardware + **/ +s32 ixgbe_init_hw(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_hw, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_reset_hw - Performs a hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks and + * clears all interrupts, performs a PHY reset, and performs a MAC reset + **/ +s32 ixgbe_reset_hw(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.reset_hw, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_start_hw - Prepares hardware for Rx/Tx + * @hw: pointer to hardware structure + * + * Starts the hardware by filling the bus info structure and media type, + * clears all on chip counters, initializes receive address registers, + * multicast table, VLAN filter table, calls routine to setup link and + * flow control settings, and leaves transmit and receive units disabled + * and uninitialized. + **/ +s32 ixgbe_start_hw(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.start_hw, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_hw_cntrs - Clear hardware counters + * @hw: pointer to hardware structure + * + * Clears all hardware statistics counters by reading them from the hardware + * Statistics counters are clear on read. + **/ +s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_hw_cntrs, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_media_type - Get media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_media_type, (hw), + ixgbe_media_type_unknown); +} + +/** + * ixgbe_get_mac_addr - Get MAC address + * @hw: pointer to hardware structure + * @mac_addr: Adapter MAC address + * + * Reads the adapter's MAC address from the first Receive Address Register + * (RAR0) A reset of the adapter must have been performed prior to calling + * this function in order for the MAC address to have been loaded from the + * EEPROM into RAR0 + **/ +s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_mac_addr, + (hw, mac_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_san_mac_addr - Get SAN MAC address + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Reads the SAN MAC address from the EEPROM, if it's available. This is + * per-port, so set_lan_id() must be called before reading the addresses. + **/ +s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_san_mac_addr, + (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_san_mac_addr - Write a SAN MAC address + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Writes A SAN MAC address to the EEPROM. + **/ +s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_san_mac_addr, + (hw, san_mac_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_device_caps - Get additional device capabilities + * @hw: pointer to hardware structure + * @device_caps: the EEPROM word for device capabilities + * + * Reads the extra device capabilities from the EEPROM + **/ +s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_device_caps, + (hw, device_caps), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_wwn_prefix - Get alternative WWNN/WWPN prefix from the EEPROM + * @hw: pointer to hardware structure + * @wwnn_prefix: the alternative WWNN prefix + * @wwpn_prefix: the alternative WWPN prefix + * + * This function will read the EEPROM from the alternative SAN MAC address + * block to check the support for the alternative WWNN/WWPN prefix support. + **/ +s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_wwn_prefix, + (hw, wwnn_prefix, wwpn_prefix), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_fcoe_boot_status - Get FCOE boot status from EEPROM + * @hw: pointer to hardware structure + * @bs: the fcoe boot status + * + * This function will read the FCOE boot status from the iSCSI FCOE block + **/ +s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_fcoe_boot_status, + (hw, bs), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_bus_info - Set PCI bus info + * @hw: pointer to hardware structure + * + * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure + **/ +s32 ixgbe_get_bus_info(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_bus_info, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_num_of_tx_queues - Get Tx queues + * @hw: pointer to hardware structure + * + * Returns the number of transmit queues for the given adapter. + **/ +u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw) +{ + return hw->mac.max_tx_queues; +} + +/** + * ixgbe_get_num_of_rx_queues - Get Rx queues + * @hw: pointer to hardware structure + * + * Returns the number of receive queues for the given adapter. + **/ +u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw) +{ + return hw->mac.max_rx_queues; +} + +/** + * ixgbe_stop_adapter - Disable Rx/Tx units + * @hw: pointer to hardware structure + * + * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts, + * disables transmit and receive units. The adapter_stopped flag is used by + * the shared code and drivers to determine if the adapter is in a stopped + * state and should not touch the hardware. + **/ +s32 ixgbe_stop_adapter(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.stop_adapter, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_pba_string - Reads part number string from EEPROM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the EEPROM + * @pba_num_size: part number string buffer length + * + * Reads the part number string from the EEPROM. + **/ +s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size) +{ + return ixgbe_read_pba_string_generic(hw, pba_num, pba_num_size); +} + +/** + * ixgbe_identify_phy - Get PHY type + * @hw: pointer to hardware structure + * + * Determines the physical layer module found on the current adapter. + **/ +s32 ixgbe_identify_phy(struct ixgbe_hw *hw) +{ + s32 status = 0; + + if (hw->phy.type == ixgbe_phy_unknown) { + status = ixgbe_call_func(hw, hw->phy.ops.identify, (hw), + IXGBE_NOT_IMPLEMENTED); + } + + return status; +} + +/** + * ixgbe_reset_phy - Perform a PHY reset + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reset_phy(struct ixgbe_hw *hw) +{ + s32 status = 0; + + if (hw->phy.type == ixgbe_phy_unknown) { + if (ixgbe_identify_phy(hw) != 0) + status = IXGBE_ERR_PHY; + } + + if (status == 0) { + status = ixgbe_call_func(hw, hw->phy.ops.reset, (hw), + IXGBE_NOT_IMPLEMENTED); + } + return status; +} + +/** + * ixgbe_get_phy_firmware_version - + * @hw: pointer to hardware structure + * @firmware_version: pointer to firmware version + **/ +s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, u16 *firmware_version) +{ + s32 status = 0; + + status = ixgbe_call_func(hw, hw->phy.ops.get_firmware_version, + (hw, firmware_version), + IXGBE_NOT_IMPLEMENTED); + return status; +} + +/** + * ixgbe_read_phy_reg - Read PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit address of PHY register to read + * @phy_data: Pointer to read data from PHY register + * + * Reads a value from a specified PHY register + **/ +s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 *phy_data) +{ + if (hw->phy.id == 0) + ixgbe_identify_phy(hw); + + return ixgbe_call_func(hw, hw->phy.ops.read_reg, (hw, reg_addr, + device_type, phy_data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_phy_reg - Write PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit PHY register to write + * @phy_data: Data to write to the PHY register + * + * Writes a value to specified PHY register + **/ +s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 phy_data) +{ + if (hw->phy.id == 0) + ixgbe_identify_phy(hw); + + return ixgbe_call_func(hw, hw->phy.ops.write_reg, (hw, reg_addr, + device_type, phy_data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_setup_phy_link - Restart PHY autoneg + * @hw: pointer to hardware structure + * + * Restart autonegotiation and PHY and waits for completion. + **/ +s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->phy.ops.setup_link, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_check_phy_link - Determine link and speed status + * @hw: pointer to hardware structure + * + * Reads a PHY register to determine if link is up and the current speed for + * the PHY. + **/ +s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + return ixgbe_call_func(hw, hw->phy.ops.check_link, (hw, speed, + link_up), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_setup_phy_link_speed - Set auto advertise + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * + * Sets the auto advertised capabilities + **/ +s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + return ixgbe_call_func(hw, hw->phy.ops.setup_link_speed, (hw, speed, + autoneg, autoneg_wait_to_complete), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_check_link - Get link and speed status + * @hw: pointer to hardware structure + * + * Reads the links register to determine if link is up and the current speed + **/ +s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete) +{ + return ixgbe_call_func(hw, hw->mac.ops.check_link, (hw, speed, + link_up, link_up_wait_to_complete), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_disable_tx_laser - Disable Tx laser + * @hw: pointer to hardware structure + * + * If the driver needs to disable the laser on SFI optics. + **/ +void ixgbe_disable_tx_laser(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.disable_tx_laser) + hw->mac.ops.disable_tx_laser(hw); +} + +/** + * ixgbe_enable_tx_laser - Enable Tx laser + * @hw: pointer to hardware structure + * + * If the driver needs to enable the laser on SFI optics. + **/ +void ixgbe_enable_tx_laser(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.enable_tx_laser) + hw->mac.ops.enable_tx_laser(hw); +} + +/** + * ixgbe_flap_tx_laser - flap Tx laser to start autotry process + * @hw: pointer to hardware structure + * + * When the driver changes the link speeds that it can support then + * flap the tx laser to alert the link partner to start autotry + * process on its end. + **/ +void ixgbe_flap_tx_laser(struct ixgbe_hw *hw) +{ + if (hw->mac.ops.flap_tx_laser) + hw->mac.ops.flap_tx_laser(hw); +} + +/** + * ixgbe_setup_link - Set link speed + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * + * Configures link settings. Restarts the link. + * Performs autonegotiation if needed. + **/ +s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + return ixgbe_call_func(hw, hw->mac.ops.setup_link, (hw, speed, + autoneg, autoneg_wait_to_complete), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_link_capabilities - Returns link capabilities + * @hw: pointer to hardware structure + * + * Determines the link capabilities of the current configuration. + **/ +s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *autoneg) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_link_capabilities, (hw, + speed, autoneg), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_led_on - Turn on LEDs + * @hw: pointer to hardware structure + * @index: led number to turn on + * + * Turns on the software controllable LEDs. + **/ +s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.led_on, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_led_off - Turn off LEDs + * @hw: pointer to hardware structure + * @index: led number to turn off + * + * Turns off the software controllable LEDs. + **/ +s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.led_off, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_blink_led_start - Blink LEDs + * @hw: pointer to hardware structure + * @index: led number to blink + * + * Blink LED based on index. + **/ +s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.blink_led_start, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_blink_led_stop - Stop blinking LEDs + * @hw: pointer to hardware structure + * + * Stop blinking LED based on index. + **/ +s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.blink_led_stop, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_eeprom_params - Initialize EEPROM parameters + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + **/ +s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.init_params, (hw), + IXGBE_NOT_IMPLEMENTED); +} + + +/** + * ixgbe_write_eeprom - Write word to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @data: 16 bit word to be written to the EEPROM + * + * Writes 16 bit value to EEPROM. If ixgbe_eeprom_update_checksum is not + * called after this function, the EEPROM will most likely contain an + * invalid checksum. + **/ +s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.write, (hw, offset, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_eeprom_buffer - Write word(s) to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @data: 16 bit word(s) to be written to the EEPROM + * @words: number of words + * + * Writes 16 bit word(s) to EEPROM. If ixgbe_eeprom_update_checksum is not + * called after this function, the EEPROM will most likely contain an + * invalid checksum. + **/ +s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, u16 words, + u16 *data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.write_buffer, + (hw, offset, words, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_eeprom - Read word from EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit value from EEPROM + * + * Reads 16 bit value from EEPROM + **/ +s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.read, (hw, offset, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_eeprom_buffer - Read word(s) from EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit word(s) from EEPROM + * @words: number of words + * + * Reads 16 bit word(s) from EEPROM + **/ +s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.read_buffer, + (hw, offset, words, data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_validate_eeprom_checksum - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum + **/ +s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.validate_checksum, + (hw, checksum_val), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_eeprom_update_checksum - Updates the EEPROM checksum + * @hw: pointer to hardware structure + **/ +s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->eeprom.ops.update_checksum, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_insert_mac_addr - Find a RAR for this mac address + * @hw: pointer to hardware structure + * @addr: Address to put into receive address register + * @vmdq: VMDq pool to assign + * + * Puts an ethernet address into a receive address register, or + * finds the rar that it is aleady in; adds to the pool list + **/ +s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.insert_mac_addr, + (hw, addr, vmdq), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_rar - Set Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * @addr: Address to put into receive address register + * @vmdq: VMDq "set" + * @enable_addr: set flag that address is active + * + * Puts an ethernet address into a receive address register. + **/ +s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_rar, (hw, index, addr, vmdq, + enable_addr), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_rar - Clear Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * + * Puts an ethernet address into a receive address register. + **/ +s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_rar, (hw, index), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_vmdq - Associate a VMDq index with a receive address + * @hw: pointer to hardware structure + * @rar: receive address register index to associate with VMDq index + * @vmdq: VMDq set or pool index + **/ +s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vmdq, (hw, rar, vmdq), + IXGBE_NOT_IMPLEMENTED); + +} + +/** + * ixgbe_set_vmdq_san_mac - Associate VMDq index 127 with a receive address + * @hw: pointer to hardware structure + * @vmdq: VMDq default pool index + **/ +s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vmdq_san_mac, + (hw, vmdq), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_vmdq - Disassociate a VMDq index from a receive address + * @hw: pointer to hardware structure + * @rar: receive address register index to disassociate with VMDq index + * @vmdq: VMDq set or pool index + **/ +s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_vmdq, (hw, rar, vmdq), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_rx_addrs - Initializes receive address filters. + * @hw: pointer to hardware structure + * + * Places the MAC address in receive address register 0 and clears the rest + * of the receive address registers. Clears the multicast table. Assumes + * the receiver is in reset when the routine is called. + **/ +s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_rx_addrs, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_num_rx_addrs - Returns the number of RAR entries. + * @hw: pointer to hardware structure + **/ +u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw) +{ + return hw->mac.num_rar_entries; +} + +/** + * ixgbe_update_uc_addr_list - Updates the MAC's list of secondary addresses + * @hw: pointer to hardware structure + * @addr_list: the list of new multicast addresses + * @addr_count: number of addresses + * @func: iterator function to walk the multicast address list + * + * The given list replaces any existing list. Clears the secondary addrs from + * receive address registers. Uses unused receive address registers for the + * first secondary addresses, and falls back to promiscuous mode as needed. + **/ +s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr func) +{ + return ixgbe_call_func(hw, hw->mac.ops.update_uc_addr_list, (hw, + addr_list, addr_count, func), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_update_mc_addr_list - Updates the MAC's list of multicast addresses + * @hw: pointer to hardware structure + * @mc_addr_list: the list of new multicast addresses + * @mc_addr_count: number of addresses + * @func: iterator function to walk the multicast address list + * + * The given list replaces any existing list. Clears the MC addrs from receive + * address registers and the multicast table. Uses unused receive address + * registers for the first multicast addresses, and hashes the rest into the + * multicast table. + **/ +s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr func, + bool clear) +{ + return ixgbe_call_func(hw, hw->mac.ops.update_mc_addr_list, (hw, + mc_addr_list, mc_addr_count, func, clear), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_enable_mc - Enable multicast address in RAR + * @hw: pointer to hardware structure + * + * Enables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_enable_mc(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.enable_mc, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_disable_mc - Disable multicast address in RAR + * @hw: pointer to hardware structure + * + * Disables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_disable_mc(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.disable_mc, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_clear_vfta - Clear VLAN filter table + * @hw: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +s32 ixgbe_clear_vfta(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.clear_vfta, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_vfta - Set VLAN filter table + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFTA + * @vlan_on: boolean flag to turn on/off VLAN in VFTA + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vfta, (hw, vlan, vind, + vlan_on), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_vlvf - Set VLAN Pool Filter + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vfta_changed: pointer to boolean flag which indicates whether VFTA + * should be changed + * + * Turn on/off specified bit in VLVF table. + **/ +s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, bool vlan_on, + bool *vfta_changed) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_vlvf, (hw, vlan, vind, + vlan_on, vfta_changed), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_fc_enable - Enable flow control + * @hw: pointer to hardware structure + * + * Configures the flow control settings based on SW configuration. + **/ +s32 ixgbe_fc_enable(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.fc_enable, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_set_fw_drv_ver - Try to send the driver version number FW + * @hw: pointer to hardware structure + * @maj: driver major number to be sent to firmware + * @min: driver minor number to be sent to firmware + * @build: driver build number to be sent to firmware + * @ver: driver version number to be sent to firmware + **/ +s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, + u8 ver) +{ + return ixgbe_call_func(hw, hw->mac.ops.set_fw_drv_ver, (hw, maj, min, + build, ver), IXGBE_NOT_IMPLEMENTED); +} + + +/** + * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data + * @hw: pointer to hardware structure + * + * Updates the temperatures in mac.thermal_sensor_data + **/ +s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_thermal_sensor_data, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_thermal_sensor_thresh - Inits thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Inits the thermal sensor thresholds according to the NVM map + **/ +s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_thermal_sensor_thresh, (hw), + IXGBE_NOT_IMPLEMENTED); +} +/** + * ixgbe_read_analog_reg8 - Reads 8 bit analog register + * @hw: pointer to hardware structure + * @reg: analog register to read + * @val: read value + * + * Performs write operation to analog register specified. + **/ +s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val) +{ + return ixgbe_call_func(hw, hw->mac.ops.read_analog_reg8, (hw, reg, + val), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_analog_reg8 - Writes 8 bit analog register + * @hw: pointer to hardware structure + * @reg: analog register to write + * @val: value to write + * + * Performs write operation to Atlas analog register specified. + **/ +s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val) +{ + return ixgbe_call_func(hw, hw->mac.ops.write_analog_reg8, (hw, reg, + val), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_init_uta_tables - Initializes Unicast Table Arrays. + * @hw: pointer to hardware structure + * + * Initializes the Unicast Table Arrays to zero on device load. This + * is part of the Rx init addr execution path. + **/ +s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.init_uta_tables, (hw), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_i2c_byte - Reads 8 bit word over I2C at specified device address + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 *data) +{ + return ixgbe_call_func(hw, hw->phy.ops.read_i2c_byte, (hw, byte_offset, + dev_addr, data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_i2c_byte - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface + * at a specified device address. + **/ +s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 data) +{ + return ixgbe_call_func(hw, hw->phy.ops.write_i2c_byte, (hw, byte_offset, + dev_addr, data), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_write_i2c_eeprom - Writes 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to write + * @eeprom_data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, + u8 byte_offset, u8 eeprom_data) +{ + return ixgbe_call_func(hw, hw->phy.ops.write_i2c_eeprom, + (hw, byte_offset, eeprom_data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_read_i2c_eeprom - Reads 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data) +{ + return ixgbe_call_func(hw, hw->phy.ops.read_i2c_eeprom, + (hw, byte_offset, eeprom_data), + IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_get_supported_physical_layer - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.get_supported_physical_layer, + (hw), IXGBE_PHYSICAL_LAYER_UNKNOWN); +} + +/** + * ixgbe_enable_rx_dma - Enables Rx DMA unit, dependent on device specifics + * @hw: pointer to hardware structure + * @regval: bitfield to write to the Rx DMA register + * + * Enables the Rx DMA unit of the device. + **/ +s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval) +{ + return ixgbe_call_func(hw, hw->mac.ops.enable_rx_dma, + (hw, regval), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_disable_sec_rx_path - Stops the receive data path + * @hw: pointer to hardware structure + * + * Stops the receive data path. + **/ +s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.disable_sec_rx_path, + (hw), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_enable_sec_rx_path - Enables the receive data path + * @hw: pointer to hardware structure + * + * Enables the receive data path. + **/ +s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw) +{ + return ixgbe_call_func(hw, hw->mac.ops.enable_sec_rx_path, + (hw), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_acquire_swfw_semaphore - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore through SW_FW_SYNC register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) +{ + return ixgbe_call_func(hw, hw->mac.ops.acquire_swfw_sync, + (hw, mask), IXGBE_NOT_IMPLEMENTED); +} + +/** + * ixgbe_release_swfw_semaphore - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through SW_FW_SYNC register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask) +{ + if (hw->mac.ops.release_swfw_sync) + hw->mac.ops.release_swfw_sync(hw, mask); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h new file mode 100644 index 00000000..a6ab30d2 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_api.h @@ -0,0 +1,168 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_API_H_ +#define _IXGBE_API_H_ + +#include "ixgbe_type.h" + +s32 ixgbe_init_shared_code(struct ixgbe_hw *hw); + +extern s32 ixgbe_init_ops_82598(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_82599(struct ixgbe_hw *hw); +extern s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw); + +s32 ixgbe_set_mac_type(struct ixgbe_hw *hw); +s32 ixgbe_init_hw(struct ixgbe_hw *hw); +s32 ixgbe_reset_hw(struct ixgbe_hw *hw); +s32 ixgbe_start_hw(struct ixgbe_hw *hw); +s32 ixgbe_clear_hw_cntrs(struct ixgbe_hw *hw); +enum ixgbe_media_type ixgbe_get_media_type(struct ixgbe_hw *hw); +s32 ixgbe_get_mac_addr(struct ixgbe_hw *hw, u8 *mac_addr); +s32 ixgbe_get_bus_info(struct ixgbe_hw *hw); +u32 ixgbe_get_num_of_tx_queues(struct ixgbe_hw *hw); +u32 ixgbe_get_num_of_rx_queues(struct ixgbe_hw *hw); +s32 ixgbe_stop_adapter(struct ixgbe_hw *hw); +s32 ixgbe_read_pba_string(struct ixgbe_hw *hw, u8 *pba_num, u32 pba_num_size); + +s32 ixgbe_identify_phy(struct ixgbe_hw *hw); +s32 ixgbe_reset_phy(struct ixgbe_hw *hw); +s32 ixgbe_read_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 *phy_data); +s32 ixgbe_write_phy_reg(struct ixgbe_hw *hw, u32 reg_addr, u32 device_type, + u16 phy_data); + +s32 ixgbe_setup_phy_link(struct ixgbe_hw *hw); +s32 ixgbe_check_phy_link(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up); +s32 ixgbe_setup_phy_link_speed(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +void ixgbe_disable_tx_laser(struct ixgbe_hw *hw); +void ixgbe_enable_tx_laser(struct ixgbe_hw *hw); +void ixgbe_flap_tx_laser(struct ixgbe_hw *hw); +s32 ixgbe_setup_link(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, bool autoneg_wait_to_complete); +s32 ixgbe_check_link(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete); +s32 ixgbe_get_link_capabilities(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *autoneg); +s32 ixgbe_led_on(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_led_off(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_start(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_stop(struct ixgbe_hw *hw, u32 index); + +s32 ixgbe_init_eeprom_params(struct ixgbe_hw *hw); +s32 ixgbe_write_eeprom(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_read_eeprom(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_eeprom_buffer(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); + +s32 ixgbe_validate_eeprom_checksum(struct ixgbe_hw *hw, u16 *checksum_val); +s32 ixgbe_update_eeprom_checksum(struct ixgbe_hw *hw); + +s32 ixgbe_insert_mac_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); +s32 ixgbe_set_rar(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr); +s32 ixgbe_clear_rar(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_set_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_set_vmdq_san_mac(struct ixgbe_hw *hw, u32 vmdq); +s32 ixgbe_clear_vmdq(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_init_rx_addrs(struct ixgbe_hw *hw); +u32 ixgbe_get_num_rx_addrs(struct ixgbe_hw *hw); +s32 ixgbe_update_uc_addr_list(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr func); +s32 ixgbe_update_mc_addr_list(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr func, + bool clear); +void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr_list, u32 vmdq); +s32 ixgbe_enable_mc(struct ixgbe_hw *hw); +s32 ixgbe_disable_mc(struct ixgbe_hw *hw); +s32 ixgbe_clear_vfta(struct ixgbe_hw *hw); +s32 ixgbe_set_vfta(struct ixgbe_hw *hw, u32 vlan, + u32 vind, bool vlan_on); +s32 ixgbe_set_vlvf(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool *vfta_changed); +s32 ixgbe_fc_enable(struct ixgbe_hw *hw); +s32 ixgbe_set_fw_drv_ver(struct ixgbe_hw *hw, u8 maj, u8 min, u8 build, + u8 ver); +s32 ixgbe_get_thermal_sensor_data(struct ixgbe_hw *hw); +s32 ixgbe_init_thermal_sensor_thresh(struct ixgbe_hw *hw); +void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr); +s32 ixgbe_get_phy_firmware_version(struct ixgbe_hw *hw, + u16 *firmware_version); +s32 ixgbe_read_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 *val); +s32 ixgbe_write_analog_reg8(struct ixgbe_hw *hw, u32 reg, u8 val); +s32 ixgbe_init_uta_tables(struct ixgbe_hw *hw); +s32 ixgbe_read_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 *eeprom_data); +u32 ixgbe_get_supported_physical_layer(struct ixgbe_hw *hw); +s32 ixgbe_enable_rx_dma(struct ixgbe_hw *hw, u32 regval); +s32 ixgbe_disable_sec_rx_path(struct ixgbe_hw *hw); +s32 ixgbe_enable_sec_rx_path(struct ixgbe_hw *hw); +s32 ixgbe_reinit_fdir_tables_82599(struct ixgbe_hw *hw); +s32 ixgbe_init_fdir_signature_82599(struct ixgbe_hw *hw, u32 fdirctrl); +s32 ixgbe_init_fdir_perfect_82599(struct ixgbe_hw *hw, u32 fdirctrl); +s32 ixgbe_fdir_add_signature_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common, + u8 queue); +s32 ixgbe_fdir_set_input_mask_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input_mask); +s32 ixgbe_fdir_write_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id, u8 queue); +s32 ixgbe_fdir_erase_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + u16 soft_id); +s32 ixgbe_fdir_add_perfect_filter_82599(struct ixgbe_hw *hw, + union ixgbe_atr_input *input, + union ixgbe_atr_input *mask, + u16 soft_id, + u8 queue); +void ixgbe_atr_compute_perfect_hash_82599(union ixgbe_atr_input *input, + union ixgbe_atr_input *mask); +u32 ixgbe_atr_compute_sig_hash_82599(union ixgbe_atr_hash_dword input, + union ixgbe_atr_hash_dword common); +s32 ixgbe_read_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 *data); +s32 ixgbe_write_i2c_byte(struct ixgbe_hw *hw, u8 byte_offset, u8 dev_addr, + u8 data); +s32 ixgbe_write_i2c_eeprom(struct ixgbe_hw *hw, u8 byte_offset, u8 eeprom_data); +s32 ixgbe_get_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); +s32 ixgbe_set_san_mac_addr(struct ixgbe_hw *hw, u8 *san_mac_addr); +s32 ixgbe_get_device_caps(struct ixgbe_hw *hw, u16 *device_caps); +s32 ixgbe_acquire_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_semaphore(struct ixgbe_hw *hw, u16 mask); +s32 ixgbe_get_wwn_prefix(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix); +s32 ixgbe_get_fcoe_boot_status(struct ixgbe_hw *hw, u16 *bs); + +#endif /* _IXGBE_API_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c new file mode 100644 index 00000000..93659ca0 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.c @@ -0,0 +1,4082 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_common.h" +#include "ixgbe_phy.h" +#include "ixgbe_api.h" + +static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw); +static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw); +static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw); +static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw); +static void ixgbe_standby_eeprom(struct ixgbe_hw *hw); +static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, + u16 count); +static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count); +static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec); +static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec); +static void ixgbe_release_eeprom(struct ixgbe_hw *hw); + +static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr); +static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, + u16 *san_mac_offset); +static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, + u16 offset); + +/** + * ixgbe_init_ops_generic - Inits function ptrs + * @hw: pointer to the hardware structure + * + * Initialize the function pointers. + **/ +s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + struct ixgbe_mac_info *mac = &hw->mac; + u32 eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* EEPROM */ + eeprom->ops.init_params = &ixgbe_init_eeprom_params_generic; + /* If EEPROM is valid (bit 8 = 1), use EERD otherwise use bit bang */ + if (eec & IXGBE_EEC_PRES) { + eeprom->ops.read = &ixgbe_read_eerd_generic; + eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_generic; + } else { + eeprom->ops.read = &ixgbe_read_eeprom_bit_bang_generic; + eeprom->ops.read_buffer = + &ixgbe_read_eeprom_buffer_bit_bang_generic; + } + eeprom->ops.write = &ixgbe_write_eeprom_generic; + eeprom->ops.write_buffer = &ixgbe_write_eeprom_buffer_bit_bang_generic; + eeprom->ops.validate_checksum = + &ixgbe_validate_eeprom_checksum_generic; + eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_generic; + eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_generic; + + /* MAC */ + mac->ops.init_hw = &ixgbe_init_hw_generic; + mac->ops.reset_hw = NULL; + mac->ops.start_hw = &ixgbe_start_hw_generic; + mac->ops.clear_hw_cntrs = &ixgbe_clear_hw_cntrs_generic; + mac->ops.get_media_type = NULL; + mac->ops.get_supported_physical_layer = NULL; + mac->ops.enable_rx_dma = &ixgbe_enable_rx_dma_generic; + mac->ops.get_mac_addr = &ixgbe_get_mac_addr_generic; + mac->ops.stop_adapter = &ixgbe_stop_adapter_generic; + mac->ops.get_bus_info = &ixgbe_get_bus_info_generic; + mac->ops.set_lan_id = &ixgbe_set_lan_id_multi_port_pcie; + mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync; + mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync; + + /* LEDs */ + mac->ops.led_on = &ixgbe_led_on_generic; + mac->ops.led_off = &ixgbe_led_off_generic; + mac->ops.blink_led_start = &ixgbe_blink_led_start_generic; + mac->ops.blink_led_stop = &ixgbe_blink_led_stop_generic; + + /* RAR, Multicast, VLAN */ + mac->ops.set_rar = &ixgbe_set_rar_generic; + mac->ops.clear_rar = &ixgbe_clear_rar_generic; + mac->ops.insert_mac_addr = NULL; + mac->ops.set_vmdq = NULL; + mac->ops.clear_vmdq = NULL; + mac->ops.init_rx_addrs = &ixgbe_init_rx_addrs_generic; + mac->ops.update_uc_addr_list = &ixgbe_update_uc_addr_list_generic; + mac->ops.update_mc_addr_list = &ixgbe_update_mc_addr_list_generic; + mac->ops.enable_mc = &ixgbe_enable_mc_generic; + mac->ops.disable_mc = &ixgbe_disable_mc_generic; + mac->ops.clear_vfta = NULL; + mac->ops.set_vfta = NULL; + mac->ops.set_vlvf = NULL; + mac->ops.init_uta_tables = NULL; + + /* Flow Control */ + mac->ops.fc_enable = &ixgbe_fc_enable_generic; + + /* Link */ + mac->ops.get_link_capabilities = NULL; + mac->ops.setup_link = NULL; + mac->ops.check_link = NULL; + + return 0; +} + +/** + * ixgbe_device_supports_autoneg_fc - Check if phy supports autoneg flow + * control + * @hw: pointer to hardware structure + * + * There are several phys that do not support autoneg flow control. This + * function check the device id to see if the associated phy supports + * autoneg flow control. + **/ +static s32 ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw) +{ + + switch (hw->device_id) { + case IXGBE_DEV_ID_X540T: + return 0; + case IXGBE_DEV_ID_82599_T3_LOM: + return 0; + default: + return IXGBE_ERR_FC_NOT_SUPPORTED; + } +} + +/** + * ixgbe_setup_fc - Set up flow control + * @hw: pointer to hardware structure + * + * Called at init time to set up flow control. + **/ +static s32 ixgbe_setup_fc(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 reg = 0, reg_bp = 0; + u16 reg_cu = 0; + + /* + * Validate the requested mode. Strict IEEE mode does not allow + * ixgbe_fc_rx_pause because it will cause us to fail at UNH. + */ + if (hw->fc.strict_ieee && hw->fc.requested_mode == ixgbe_fc_rx_pause) { + hw_dbg(hw, "ixgbe_fc_rx_pause not valid in strict IEEE mode\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + + /* + * 10gig parts do not have a word in the EEPROM to determine the + * default flow control setting, so we explicitly set it to full. + */ + if (hw->fc.requested_mode == ixgbe_fc_default) + hw->fc.requested_mode = ixgbe_fc_full; + + /* + * Set up the 1G and 10G flow control advertisement registers so the + * HW will be able to do fc autoneg once the cable is plugged in. If + * we link at 10G, the 1G advertisement is harmless and vice versa. + */ + switch (hw->phy.media_type) { + case ixgbe_media_type_fiber: + case ixgbe_media_type_backplane: + reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + reg_bp = IXGBE_READ_REG(hw, IXGBE_AUTOC); + break; + case ixgbe_media_type_copper: + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, ®_cu); + break; + default: + break; + } + + /* + * The possible values of fc.requested_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.requested_mode) { + case ixgbe_fc_none: + /* Flow control completely disabled by software override. */ + reg &= ~(IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE); + if (hw->phy.media_type == ixgbe_media_type_backplane) + reg_bp &= ~(IXGBE_AUTOC_SYM_PAUSE | + IXGBE_AUTOC_ASM_PAUSE); + else if (hw->phy.media_type == ixgbe_media_type_copper) + reg_cu &= ~(IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE); + break; + case ixgbe_fc_tx_pause: + /* + * Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + reg |= IXGBE_PCS1GANA_ASM_PAUSE; + reg &= ~IXGBE_PCS1GANA_SYM_PAUSE; + if (hw->phy.media_type == ixgbe_media_type_backplane) { + reg_bp |= IXGBE_AUTOC_ASM_PAUSE; + reg_bp &= ~IXGBE_AUTOC_SYM_PAUSE; + } else if (hw->phy.media_type == ixgbe_media_type_copper) { + reg_cu |= IXGBE_TAF_ASM_PAUSE; + reg_cu &= ~IXGBE_TAF_SYM_PAUSE; + } + break; + case ixgbe_fc_rx_pause: + /* + * Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE, as such we fall + * through to the fc_full statement. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + reg |= IXGBE_PCS1GANA_SYM_PAUSE | IXGBE_PCS1GANA_ASM_PAUSE; + if (hw->phy.media_type == ixgbe_media_type_backplane) + reg_bp |= IXGBE_AUTOC_SYM_PAUSE | + IXGBE_AUTOC_ASM_PAUSE; + else if (hw->phy.media_type == ixgbe_media_type_copper) + reg_cu |= IXGBE_TAF_SYM_PAUSE | IXGBE_TAF_ASM_PAUSE; + break; + default: + hw_dbg(hw, "Flow control param set incorrectly\n"); + ret_val = IXGBE_ERR_CONFIG; + goto out; + break; + } + + if (hw->mac.type != ixgbe_mac_X540) { + /* + * Enable auto-negotiation between the MAC & PHY; + * the MAC will advertise clause 37 flow control. + */ + IXGBE_WRITE_REG(hw, IXGBE_PCS1GANA, reg); + reg = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); + + /* Disable AN timeout */ + if (hw->fc.strict_ieee) + reg &= ~IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN; + + IXGBE_WRITE_REG(hw, IXGBE_PCS1GLCTL, reg); + hw_dbg(hw, "Set up FC; PCS1GLCTL = 0x%08X\n", reg); + } + + /* + * AUTOC restart handles negotiation of 1G and 10G on backplane + * and copper. There is no need to set the PCS1GCTL register. + * + */ + if (hw->phy.media_type == ixgbe_media_type_backplane) { + reg_bp |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_bp); + } else if ((hw->phy.media_type == ixgbe_media_type_copper) && + (ixgbe_device_supports_autoneg_fc(hw) == 0)) { + hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, reg_cu); + } + + hw_dbg(hw, "Set up FC; IXGBE_AUTOC = 0x%08X\n", reg); +out: + return ret_val; +} + +/** + * ixgbe_start_hw_generic - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware by filling the bus info structure and media type, clears + * all on chip counters, initializes receive address registers, multicast + * table, VLAN filter table, calls routine to set up link and flow control + * settings, and leaves transmit and receive units disabled and uninitialized + **/ +s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw) +{ + s32 ret_val; + u32 ctrl_ext; + + /* Set the media type */ + hw->phy.media_type = hw->mac.ops.get_media_type(hw); + + /* PHY ops initialization must be done in reset_hw() */ + + /* Clear the VLAN filter table */ + hw->mac.ops.clear_vfta(hw); + + /* Clear statistics registers */ + hw->mac.ops.clear_hw_cntrs(hw); + + /* Set No Snoop Disable */ + ctrl_ext = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + ctrl_ext |= IXGBE_CTRL_EXT_NS_DIS; + IXGBE_WRITE_REG(hw, IXGBE_CTRL_EXT, ctrl_ext); + IXGBE_WRITE_FLUSH(hw); + + /* Setup flow control */ + ret_val = ixgbe_setup_fc(hw); + if (ret_val != 0) + goto out; + + /* Clear adapter stopped flag */ + hw->adapter_stopped = false; + +out: + return ret_val; +} + +/** + * ixgbe_start_hw_gen2 - Init sequence for common device family + * @hw: pointer to hw structure + * + * Performs the init sequence common to the second generation + * of 10 GbE devices. + * Devices in the second generation: + * 82599 + * X540 + **/ +s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw) +{ + u32 i; + u32 regval; + + /* Clear the rate limiters */ + for (i = 0; i < hw->mac.max_tx_queues; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RTTDQSEL, i); + IXGBE_WRITE_REG(hw, IXGBE_RTTBCNRC, 0); + } + IXGBE_WRITE_FLUSH(hw); + + /* Disable relaxed ordering */ + for (i = 0; i < hw->mac.max_tx_queues; i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL_82599(i)); + regval &= ~IXGBE_DCA_TXCTRL_DESC_WRO_EN; + IXGBE_WRITE_REG(hw, IXGBE_DCA_TXCTRL_82599(i), regval); + } + + for (i = 0; i < hw->mac.max_rx_queues; i++) { + regval = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regval &= ~(IXGBE_DCA_RXCTRL_DATA_WRO_EN | + IXGBE_DCA_RXCTRL_HEAD_WRO_EN); + IXGBE_WRITE_REG(hw, IXGBE_DCA_RXCTRL(i), regval); + } + + return 0; +} + +/** + * ixgbe_init_hw_generic - Generic hardware initialization + * @hw: pointer to hardware structure + * + * Initialize the hardware by resetting the hardware, filling the bus info + * structure and media type, clears all on chip counters, initializes receive + * address registers, multicast table, VLAN filter table, calls routine to set + * up link and flow control settings, and leaves transmit and receive units + * disabled and uninitialized + **/ +s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw) +{ + s32 status; + + /* Reset the hardware */ + status = hw->mac.ops.reset_hw(hw); + + if (status == 0) { + /* Start the HW */ + status = hw->mac.ops.start_hw(hw); + } + + return status; +} + +/** + * ixgbe_clear_hw_cntrs_generic - Generic clear hardware counters + * @hw: pointer to hardware structure + * + * Clears all hardware statistics counters by reading them from the hardware + * Statistics counters are clear on read. + **/ +s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw) +{ + u16 i = 0; + + IXGBE_READ_REG(hw, IXGBE_CRCERRS); + IXGBE_READ_REG(hw, IXGBE_ILLERRC); + IXGBE_READ_REG(hw, IXGBE_ERRBC); + IXGBE_READ_REG(hw, IXGBE_MSPDC); + for (i = 0; i < 8; i++) + IXGBE_READ_REG(hw, IXGBE_MPC(i)); + + IXGBE_READ_REG(hw, IXGBE_MLFC); + IXGBE_READ_REG(hw, IXGBE_MRFC); + IXGBE_READ_REG(hw, IXGBE_RLEC); + IXGBE_READ_REG(hw, IXGBE_LXONTXC); + IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + if (hw->mac.type >= ixgbe_mac_82599EB) { + IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); + IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } else { + IXGBE_READ_REG(hw, IXGBE_LXONRXC); + IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + } + + for (i = 0; i < 8; i++) { + IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); + IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); + if (hw->mac.type >= ixgbe_mac_82599EB) { + IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); + IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); + } else { + IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); + IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); + } + } + if (hw->mac.type >= ixgbe_mac_82599EB) + for (i = 0; i < 8; i++) + IXGBE_READ_REG(hw, IXGBE_PXON2OFFCNT(i)); + IXGBE_READ_REG(hw, IXGBE_PRC64); + IXGBE_READ_REG(hw, IXGBE_PRC127); + IXGBE_READ_REG(hw, IXGBE_PRC255); + IXGBE_READ_REG(hw, IXGBE_PRC511); + IXGBE_READ_REG(hw, IXGBE_PRC1023); + IXGBE_READ_REG(hw, IXGBE_PRC1522); + IXGBE_READ_REG(hw, IXGBE_GPRC); + IXGBE_READ_REG(hw, IXGBE_BPRC); + IXGBE_READ_REG(hw, IXGBE_MPRC); + IXGBE_READ_REG(hw, IXGBE_GPTC); + IXGBE_READ_REG(hw, IXGBE_GORCL); + IXGBE_READ_REG(hw, IXGBE_GORCH); + IXGBE_READ_REG(hw, IXGBE_GOTCL); + IXGBE_READ_REG(hw, IXGBE_GOTCH); + if (hw->mac.type == ixgbe_mac_82598EB) + for (i = 0; i < 8; i++) + IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + IXGBE_READ_REG(hw, IXGBE_RUC); + IXGBE_READ_REG(hw, IXGBE_RFC); + IXGBE_READ_REG(hw, IXGBE_ROC); + IXGBE_READ_REG(hw, IXGBE_RJC); + IXGBE_READ_REG(hw, IXGBE_MNGPRC); + IXGBE_READ_REG(hw, IXGBE_MNGPDC); + IXGBE_READ_REG(hw, IXGBE_MNGPTC); + IXGBE_READ_REG(hw, IXGBE_TORL); + IXGBE_READ_REG(hw, IXGBE_TORH); + IXGBE_READ_REG(hw, IXGBE_TPR); + IXGBE_READ_REG(hw, IXGBE_TPT); + IXGBE_READ_REG(hw, IXGBE_PTC64); + IXGBE_READ_REG(hw, IXGBE_PTC127); + IXGBE_READ_REG(hw, IXGBE_PTC255); + IXGBE_READ_REG(hw, IXGBE_PTC511); + IXGBE_READ_REG(hw, IXGBE_PTC1023); + IXGBE_READ_REG(hw, IXGBE_PTC1522); + IXGBE_READ_REG(hw, IXGBE_MPTC); + IXGBE_READ_REG(hw, IXGBE_BPTC); + for (i = 0; i < 16; i++) { + IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + if (hw->mac.type >= ixgbe_mac_82599EB) { + IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); + IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); + } else { + IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + } + } + + if (hw->mac.type == ixgbe_mac_X540) { + if (hw->phy.id == 0) + ixgbe_identify_phy(hw); + hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECL, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + hw->phy.ops.read_reg(hw, IXGBE_PCRC8ECH, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + hw->phy.ops.read_reg(hw, IXGBE_LDPCECL, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + hw->phy.ops.read_reg(hw, IXGBE_LDPCECH, + IXGBE_MDIO_PCS_DEV_TYPE, &i); + } + + return 0; +} + +/** + * ixgbe_read_pba_string_generic - Reads part number string from EEPROM + * @hw: pointer to hardware structure + * @pba_num: stores the part number string from the EEPROM + * @pba_num_size: part number string buffer length + * + * Reads the part number string from the EEPROM. + **/ +s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size) +{ + s32 ret_val; + u16 data; + u16 pba_ptr; + u16 offset; + u16 length; + + if (pba_num == NULL) { + hw_dbg(hw, "PBA string buffer was null\n"); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM0_PTR, &data); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + + ret_val = hw->eeprom.ops.read(hw, IXGBE_PBANUM1_PTR, &pba_ptr); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + + /* + * if data is not ptr guard the PBA must be in legacy format which + * means pba_ptr is actually our second data word for the PBA number + * and we can decode it into an ascii string + */ + if (data != IXGBE_PBANUM_PTR_GUARD) { + hw_dbg(hw, "NVM PBA number is not stored as string\n"); + + /* we will need 11 characters to store the PBA */ + if (pba_num_size < 11) { + hw_dbg(hw, "PBA string buffer too small\n"); + return IXGBE_ERR_NO_SPACE; + } + + /* extract hex string from data and pba_ptr */ + pba_num[0] = (data >> 12) & 0xF; + pba_num[1] = (data >> 8) & 0xF; + pba_num[2] = (data >> 4) & 0xF; + pba_num[3] = data & 0xF; + pba_num[4] = (pba_ptr >> 12) & 0xF; + pba_num[5] = (pba_ptr >> 8) & 0xF; + pba_num[6] = '-'; + pba_num[7] = 0; + pba_num[8] = (pba_ptr >> 4) & 0xF; + pba_num[9] = pba_ptr & 0xF; + + /* put a null character on the end of our string */ + pba_num[10] = '\0'; + + /* switch all the data but the '-' to hex char */ + for (offset = 0; offset < 10; offset++) { + if (pba_num[offset] < 0xA) + pba_num[offset] += '0'; + else if (pba_num[offset] < 0x10) + pba_num[offset] += 'A' - 0xA; + } + + return 0; + } + + ret_val = hw->eeprom.ops.read(hw, pba_ptr, &length); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + + if (length == 0xFFFF || length == 0) { + hw_dbg(hw, "NVM PBA number section invalid length\n"); + return IXGBE_ERR_PBA_SECTION; + } + + /* check if pba_num buffer is big enough */ + if (pba_num_size < (((u32)length * 2) - 1)) { + hw_dbg(hw, "PBA string buffer too small\n"); + return IXGBE_ERR_NO_SPACE; + } + + /* trim pba length from start of string */ + pba_ptr++; + length--; + + for (offset = 0; offset < length; offset++) { + ret_val = hw->eeprom.ops.read(hw, pba_ptr + offset, &data); + if (ret_val) { + hw_dbg(hw, "NVM Read Error\n"); + return ret_val; + } + pba_num[offset * 2] = (u8)(data >> 8); + pba_num[(offset * 2) + 1] = (u8)(data & 0xFF); + } + pba_num[offset * 2] = '\0'; + + return 0; +} + +/** + * ixgbe_get_mac_addr_generic - Generic get MAC address + * @hw: pointer to hardware structure + * @mac_addr: Adapter MAC address + * + * Reads the adapter's MAC address from first Receive Address Register (RAR0) + * A reset of the adapter must be performed prior to calling this function + * in order for the MAC address to have been loaded from the EEPROM into RAR0 + **/ +s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr) +{ + u32 rar_high; + u32 rar_low; + u16 i; + + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(0)); + rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(0)); + + for (i = 0; i < 4; i++) + mac_addr[i] = (u8)(rar_low >> (i*8)); + + for (i = 0; i < 2; i++) + mac_addr[i+4] = (u8)(rar_high >> (i*8)); + + return 0; +} + +/** + * ixgbe_get_bus_info_generic - Generic set PCI bus info + * @hw: pointer to hardware structure + * + * Sets the PCI bus info (speed, width, type) within the ixgbe_hw structure + **/ +s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + u16 link_status; + + hw->bus.type = ixgbe_bus_type_pci_express; + + /* Get the negotiated link width and speed from PCI config space */ + link_status = IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_LINK_STATUS); + + switch (link_status & IXGBE_PCI_LINK_WIDTH) { + case IXGBE_PCI_LINK_WIDTH_1: + hw->bus.width = ixgbe_bus_width_pcie_x1; + break; + case IXGBE_PCI_LINK_WIDTH_2: + hw->bus.width = ixgbe_bus_width_pcie_x2; + break; + case IXGBE_PCI_LINK_WIDTH_4: + hw->bus.width = ixgbe_bus_width_pcie_x4; + break; + case IXGBE_PCI_LINK_WIDTH_8: + hw->bus.width = ixgbe_bus_width_pcie_x8; + break; + default: + hw->bus.width = ixgbe_bus_width_unknown; + break; + } + + switch (link_status & IXGBE_PCI_LINK_SPEED) { + case IXGBE_PCI_LINK_SPEED_2500: + hw->bus.speed = ixgbe_bus_speed_2500; + break; + case IXGBE_PCI_LINK_SPEED_5000: + hw->bus.speed = ixgbe_bus_speed_5000; + break; + case IXGBE_PCI_LINK_SPEED_8000: + hw->bus.speed = ixgbe_bus_speed_8000; + break; + default: + hw->bus.speed = ixgbe_bus_speed_unknown; + break; + } + + mac->ops.set_lan_id(hw); + + return 0; +} + +/** + * ixgbe_set_lan_id_multi_port_pcie - Set LAN id for PCIe multiple port devices + * @hw: pointer to the HW structure + * + * Determines the LAN function id by reading memory-mapped registers + * and swaps the port value if requested. + **/ +void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw) +{ + struct ixgbe_bus_info *bus = &hw->bus; + u32 reg; + + reg = IXGBE_READ_REG(hw, IXGBE_STATUS); + bus->func = (reg & IXGBE_STATUS_LAN_ID) >> IXGBE_STATUS_LAN_ID_SHIFT; + bus->lan_id = bus->func; + + /* check for a port swap */ + reg = IXGBE_READ_REG(hw, IXGBE_FACTPS); + if (reg & IXGBE_FACTPS_LFS) + bus->func ^= 0x1; +} + +/** + * ixgbe_stop_adapter_generic - Generic stop Tx/Rx units + * @hw: pointer to hardware structure + * + * Sets the adapter_stopped flag within ixgbe_hw struct. Clears interrupts, + * disables transmit and receive units. The adapter_stopped flag is used by + * the shared code and drivers to determine if the adapter is in a stopped + * state and should not touch the hardware. + **/ +s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw) +{ + u32 reg_val; + u16 i; + + /* + * Set the adapter_stopped flag so other driver functions stop touching + * the hardware + */ + hw->adapter_stopped = true; + + /* Disable the receive unit */ + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, 0); + + /* Clear interrupt mask to stop interrupts from being generated */ + IXGBE_WRITE_REG(hw, IXGBE_EIMC, IXGBE_IRQ_CLEAR_MASK); + + /* Clear any pending interrupts, flush previous writes */ + IXGBE_READ_REG(hw, IXGBE_EICR); + + /* Disable the transmit unit. Each queue must be disabled. */ + for (i = 0; i < hw->mac.max_tx_queues; i++) + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(i), IXGBE_TXDCTL_SWFLSH); + + /* Disable the receive unit by stopping each queue */ + for (i = 0; i < hw->mac.max_rx_queues; i++) { + reg_val = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + reg_val &= ~IXGBE_RXDCTL_ENABLE; + reg_val |= IXGBE_RXDCTL_SWFLSH; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(i), reg_val); + } + + /* flush all queues disables */ + IXGBE_WRITE_FLUSH(hw); + msleep(2); + + /* + * Prevent the PCI-E bus from from hanging by disabling PCI-E master + * access and verify no pending requests + */ + return ixgbe_disable_pcie_master(hw); +} + +/** + * ixgbe_led_on_generic - Turns on the software controllable LEDs. + * @hw: pointer to hardware structure + * @index: led number to turn on + **/ +s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* To turn on the LED, set mode to ON. */ + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg |= IXGBE_LED_ON << IXGBE_LED_MODE_SHIFT(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_led_off_generic - Turns off the software controllable LEDs. + * @hw: pointer to hardware structure + * @index: led number to turn off + **/ +s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* To turn off the LED, set mode to OFF. */ + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg |= IXGBE_LED_OFF << IXGBE_LED_MODE_SHIFT(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_init_eeprom_params_generic - Initialize EEPROM params + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + **/ +s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + u32 eec; + u16 eeprom_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { + eeprom->type = ixgbe_eeprom_none; + /* Set default semaphore delay to 10ms which is a well + * tested value */ + eeprom->semaphore_delay = 10; + /* Clear EEPROM page size, it will be initialized as needed */ + eeprom->word_page_size = 0; + + /* + * Check for EEPROM present first. + * If not present leave as none + */ + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + if (eec & IXGBE_EEC_PRES) { + eeprom->type = ixgbe_eeprom_spi; + + /* + * SPI EEPROM is assumed here. This code would need to + * change if a future EEPROM is not SPI. + */ + eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> + IXGBE_EEC_SIZE_SHIFT); + eeprom->word_size = 1 << (eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); + } + + if (eec & IXGBE_EEC_ADDR_SIZE) + eeprom->address_bits = 16; + else + eeprom->address_bits = 8; + hw_dbg(hw, "Eeprom params: type = %d, size = %d, address bits: " + "%d\n", eeprom->type, eeprom->word_size, + eeprom->address_bits); + } + + return 0; +} + +/** + * ixgbe_write_eeprom_buffer_bit_bang_generic - Write EEPROM using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to write + * @words: number of word(s) + * @data: 16 bit word(s) to write to EEPROM + * + * Reads 16 bit word(s) from EEPROM through bit-bang method + **/ +s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status = 0; + u16 i, count; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset + words > hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + /* + * The EEPROM page size cannot be queried from the chip. We do lazy + * initialization. It is worth to do that when we write large buffer. + */ + if ((hw->eeprom.word_page_size == 0) && + (words > IXGBE_EEPROM_PAGE_SIZE_MAX)) + ixgbe_detect_eeprom_page_size_generic(hw, offset); + + /* + * We cannot hold synchronization semaphores for too long + * to avoid other entity starvation. However it is more efficient + * to read in bursts than synchronizing access for each word. + */ + for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) { + count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ? + IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i); + status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset + i, + count, &data[i]); + + if (status != 0) + break; + } + +out: + return status; +} + +/** + * ixgbe_write_eeprom_buffer_bit_bang - Writes 16 bit word(s) to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @words: number of word(s) + * @data: 16 bit word(s) to be written to the EEPROM + * + * If ixgbe_eeprom_update_checksum is not called after this function, the + * EEPROM will most likely contain an invalid checksum. + **/ +static s32 ixgbe_write_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status; + u16 word; + u16 page_size; + u16 i; + u8 write_opcode = IXGBE_EEPROM_WRITE_OPCODE_SPI; + + /* Prepare the EEPROM for writing */ + status = ixgbe_acquire_eeprom(hw); + + if (status == 0) { + if (ixgbe_ready_eeprom(hw) != 0) { + ixgbe_release_eeprom(hw); + status = IXGBE_ERR_EEPROM; + } + } + + if (status == 0) { + for (i = 0; i < words; i++) { + ixgbe_standby_eeprom(hw); + + /* Send the WRITE ENABLE command (8 bit opcode ) */ + ixgbe_shift_out_eeprom_bits(hw, + IXGBE_EEPROM_WREN_OPCODE_SPI, + IXGBE_EEPROM_OPCODE_BITS); + + ixgbe_standby_eeprom(hw); + + /* + * Some SPI eeproms use the 8th address bit embedded + * in the opcode + */ + if ((hw->eeprom.address_bits == 8) && + ((offset + i) >= 128)) + write_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI; + + /* Send the Write command (8-bit opcode + addr) */ + ixgbe_shift_out_eeprom_bits(hw, write_opcode, + IXGBE_EEPROM_OPCODE_BITS); + ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2), + hw->eeprom.address_bits); + + page_size = hw->eeprom.word_page_size; + + /* Send the data in burst via SPI*/ + do { + word = data[i]; + word = (word >> 8) | (word << 8); + ixgbe_shift_out_eeprom_bits(hw, word, 16); + + if (page_size == 0) + break; + + /* do not wrap around page */ + if (((offset + i) & (page_size - 1)) == + (page_size - 1)) + break; + } while (++i < words); + + ixgbe_standby_eeprom(hw); + msleep(10); + } + /* Done with writing - release the EEPROM */ + ixgbe_release_eeprom(hw); + } + + return status; +} + +/** + * ixgbe_write_eeprom_generic - Writes 16 bit value to EEPROM + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be written to + * @data: 16 bit word to be written to the EEPROM + * + * If ixgbe_eeprom_update_checksum is not called after this function, the + * EEPROM will most likely contain an invalid checksum. + **/ +s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + s32 status; + + hw->eeprom.ops.init_params(hw); + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, 1, &data); + +out: + return status; +} + +/** + * ixgbe_read_eeprom_buffer_bit_bang_generic - Read EEPROM using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit words(s) from EEPROM + * @words: number of word(s) + * + * Reads 16 bit word(s) from EEPROM through bit-bang method + **/ +s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status = 0; + u16 i, count; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset + words > hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + /* + * We cannot hold synchronization semaphores for too long + * to avoid other entity starvation. However it is more efficient + * to read in bursts than synchronizing access for each word. + */ + for (i = 0; i < words; i += IXGBE_EEPROM_RD_BUFFER_MAX_COUNT) { + count = (words - i) / IXGBE_EEPROM_RD_BUFFER_MAX_COUNT > 0 ? + IXGBE_EEPROM_RD_BUFFER_MAX_COUNT : (words - i); + + status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset + i, + count, &data[i]); + + if (status != 0) + break; + } + +out: + return status; +} + +/** + * ixgbe_read_eeprom_buffer_bit_bang - Read EEPROM using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @words: number of word(s) + * @data: read 16 bit word(s) from EEPROM + * + * Reads 16 bit word(s) from EEPROM through bit-bang method + **/ +static s32 ixgbe_read_eeprom_buffer_bit_bang(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + s32 status; + u16 word_in; + u8 read_opcode = IXGBE_EEPROM_READ_OPCODE_SPI; + u16 i; + + /* Prepare the EEPROM for reading */ + status = ixgbe_acquire_eeprom(hw); + + if (status == 0) { + if (ixgbe_ready_eeprom(hw) != 0) { + ixgbe_release_eeprom(hw); + status = IXGBE_ERR_EEPROM; + } + } + + if (status == 0) { + for (i = 0; i < words; i++) { + ixgbe_standby_eeprom(hw); + /* + * Some SPI eeproms use the 8th address bit embedded + * in the opcode + */ + if ((hw->eeprom.address_bits == 8) && + ((offset + i) >= 128)) + read_opcode |= IXGBE_EEPROM_A8_OPCODE_SPI; + + /* Send the READ command (opcode + addr) */ + ixgbe_shift_out_eeprom_bits(hw, read_opcode, + IXGBE_EEPROM_OPCODE_BITS); + ixgbe_shift_out_eeprom_bits(hw, (u16)((offset + i) * 2), + hw->eeprom.address_bits); + + /* Read the data. */ + word_in = ixgbe_shift_in_eeprom_bits(hw, 16); + data[i] = (word_in >> 8) | (word_in << 8); + } + + /* End this read operation */ + ixgbe_release_eeprom(hw); + } + + return status; +} + +/** + * ixgbe_read_eeprom_bit_bang_generic - Read EEPROM word using bit-bang + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be read + * @data: read 16 bit value from EEPROM + * + * Reads 16 bit value from EEPROM through bit-bang method + **/ +s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 *data) +{ + s32 status; + + hw->eeprom.ops.init_params(hw); + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); + +out: + return status; +} + +/** + * ixgbe_read_eerd_buffer_generic - Read EEPROM word(s) using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @words: number of word(s) + * @data: 16 bit word(s) from the EEPROM + * + * Reads a 16 bit word(s) from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + u32 eerd; + s32 status = 0; + u32 i; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + for (i = 0; i < words; i++) { + eerd = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) + + IXGBE_EEPROM_RW_REG_START; + + IXGBE_WRITE_REG(hw, IXGBE_EERD, eerd); + status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_READ); + + if (status == 0) { + data[i] = (IXGBE_READ_REG(hw, IXGBE_EERD) >> + IXGBE_EEPROM_RW_REG_DATA); + } else { + hw_dbg(hw, "Eeprom read timed out\n"); + goto out; + } + } +out: + return status; +} + +/** + * ixgbe_detect_eeprom_page_size_generic - Detect EEPROM page size + * @hw: pointer to hardware structure + * @offset: offset within the EEPROM to be used as a scratch pad + * + * Discover EEPROM page size by writing marching data at given offset. + * This function is called only when we are writing a new large buffer + * at given offset so the data would be overwritten anyway. + **/ +static s32 ixgbe_detect_eeprom_page_size_generic(struct ixgbe_hw *hw, + u16 offset) +{ + u16 data[IXGBE_EEPROM_PAGE_SIZE_MAX]; + s32 status = 0; + u16 i; + + for (i = 0; i < IXGBE_EEPROM_PAGE_SIZE_MAX; i++) + data[i] = i; + + hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX; + status = ixgbe_write_eeprom_buffer_bit_bang(hw, offset, + IXGBE_EEPROM_PAGE_SIZE_MAX, data); + hw->eeprom.word_page_size = 0; + if (status != 0) + goto out; + + status = ixgbe_read_eeprom_buffer_bit_bang(hw, offset, 1, data); + if (status != 0) + goto out; + + /* + * When writing in burst more than the actual page size + * EEPROM address wraps around current page. + */ + hw->eeprom.word_page_size = IXGBE_EEPROM_PAGE_SIZE_MAX - data[0]; + + hw_dbg(hw, "Detected EEPROM page size = %d words.", + hw->eeprom.word_page_size); +out: + return status; +} + +/** + * ixgbe_read_eerd_generic - Read EEPROM word using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + return ixgbe_read_eerd_buffer_generic(hw, offset, 1, data); +} + +/** + * ixgbe_write_eewr_buffer_generic - Write EEPROM word(s) using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @words: number of word(s) + * @data: word(s) write to the EEPROM + * + * Write a 16 bit word(s) to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data) +{ + u32 eewr; + s32 status = 0; + u16 i; + + hw->eeprom.ops.init_params(hw); + + if (words == 0) { + status = IXGBE_ERR_INVALID_ARGUMENT; + goto out; + } + + if (offset >= hw->eeprom.word_size) { + status = IXGBE_ERR_EEPROM; + goto out; + } + + for (i = 0; i < words; i++) { + eewr = ((offset + i) << IXGBE_EEPROM_RW_ADDR_SHIFT) | + (data[i] << IXGBE_EEPROM_RW_REG_DATA) | + IXGBE_EEPROM_RW_REG_START; + + status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE); + if (status != 0) { + hw_dbg(hw, "Eeprom write EEWR timed out\n"); + goto out; + } + + IXGBE_WRITE_REG(hw, IXGBE_EEWR, eewr); + + status = ixgbe_poll_eerd_eewr_done(hw, IXGBE_NVM_POLL_WRITE); + if (status != 0) { + hw_dbg(hw, "Eeprom write EEWR timed out\n"); + goto out; + } + } + +out: + return status; +} + +/** + * ixgbe_write_eewr_generic - Write EEPROM word using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @data: word write to the EEPROM + * + * Write a 16 bit word to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + return ixgbe_write_eewr_buffer_generic(hw, offset, 1, &data); +} + +/** + * ixgbe_poll_eerd_eewr_done - Poll EERD read or EEWR write status + * @hw: pointer to hardware structure + * @ee_reg: EEPROM flag for polling + * + * Polls the status bit (bit 1) of the EERD or EEWR to determine when the + * read or write is done respectively. + **/ +s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg) +{ + u32 i; + u32 reg; + s32 status = IXGBE_ERR_EEPROM; + + for (i = 0; i < IXGBE_EERD_EEWR_ATTEMPTS; i++) { + if (ee_reg == IXGBE_NVM_POLL_READ) + reg = IXGBE_READ_REG(hw, IXGBE_EERD); + else + reg = IXGBE_READ_REG(hw, IXGBE_EEWR); + + if (reg & IXGBE_EEPROM_RW_REG_DONE) { + status = 0; + break; + } + udelay(5); + } + return status; +} + +/** + * ixgbe_acquire_eeprom - Acquire EEPROM using bit-bang + * @hw: pointer to hardware structure + * + * Prepares EEPROM for access using bit-bang method. This function should + * be called before issuing a command to the EEPROM. + **/ +static s32 ixgbe_acquire_eeprom(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 eec; + u32 i; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) + != 0) + status = IXGBE_ERR_SWFW_SYNC; + + if (status == 0) { + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* Request EEPROM Access */ + eec |= IXGBE_EEC_REQ; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + + for (i = 0; i < IXGBE_EEPROM_GRANT_ATTEMPTS; i++) { + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + if (eec & IXGBE_EEC_GNT) + break; + udelay(5); + } + + /* Release if grant not acquired */ + if (!(eec & IXGBE_EEC_GNT)) { + eec &= ~IXGBE_EEC_REQ; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + hw_dbg(hw, "Could not acquire EEPROM grant\n"); + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + status = IXGBE_ERR_EEPROM; + } + + /* Setup EEPROM for Read/Write */ + if (status == 0) { + /* Clear CS and SK */ + eec &= ~(IXGBE_EEC_CS | IXGBE_EEC_SK); + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); + } + } + return status; +} + +/** + * ixgbe_get_eeprom_semaphore - Get hardware semaphore + * @hw: pointer to hardware structure + * + * Sets the hardware semaphores so EEPROM access can occur for bit-bang method + **/ +static s32 ixgbe_get_eeprom_semaphore(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM; + u32 timeout = 2000; + u32 i; + u32 swsm; + + /* Get SMBI software semaphore between device drivers first */ + for (i = 0; i < timeout; i++) { + /* + * If the SMBI bit is 0 when we read it, then the bit will be + * set and we have the semaphore + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (!(swsm & IXGBE_SWSM_SMBI)) { + status = 0; + break; + } + udelay(50); + } + + if (i == timeout) { + hw_dbg(hw, "Driver can't access the Eeprom - SMBI Semaphore " + "not granted.\n"); + /* + * this release is particularly important because our attempts + * above to get the semaphore may have succeeded, and if there + * was a timeout, we should unconditionally clear the semaphore + * bits to free the driver to make progress + */ + ixgbe_release_eeprom_semaphore(hw); + + udelay(50); + /* + * one last try + * If the SMBI bit is 0 when we read it, then the bit will be + * set and we have the semaphore + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (!(swsm & IXGBE_SWSM_SMBI)) + status = 0; + } + + /* Now get the semaphore between SW/FW through the SWESMBI bit */ + if (status == 0) { + for (i = 0; i < timeout; i++) { + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + + /* Set the SW EEPROM semaphore bit to request access */ + swsm |= IXGBE_SWSM_SWESMBI; + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + + /* + * If we set the bit successfully then we got the + * semaphore. + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (swsm & IXGBE_SWSM_SWESMBI) + break; + + udelay(50); + } + + /* + * Release semaphores and return error if SW EEPROM semaphore + * was not granted because we don't have access to the EEPROM + */ + if (i >= timeout) { + hw_dbg(hw, "SWESMBI Software EEPROM semaphore " + "not granted.\n"); + ixgbe_release_eeprom_semaphore(hw); + status = IXGBE_ERR_EEPROM; + } + } else { + hw_dbg(hw, "Software semaphore SMBI between device drivers " + "not granted.\n"); + } + + return status; +} + +/** + * ixgbe_release_eeprom_semaphore - Release hardware semaphore + * @hw: pointer to hardware structure + * + * This function clears hardware semaphore bits. + **/ +static void ixgbe_release_eeprom_semaphore(struct ixgbe_hw *hw) +{ + u32 swsm; + + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + + /* Release both semaphores by writing 0 to the bits SWESMBI and SMBI */ + swsm &= ~(IXGBE_SWSM_SWESMBI | IXGBE_SWSM_SMBI); + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_ready_eeprom - Polls for EEPROM ready + * @hw: pointer to hardware structure + **/ +static s32 ixgbe_ready_eeprom(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 i; + u8 spi_stat_reg; + + /* + * Read "Status Register" repeatedly until the LSB is cleared. The + * EEPROM will signal that the command has been completed by clearing + * bit 0 of the internal status register. If it's not cleared within + * 5 milliseconds, then error out. + */ + for (i = 0; i < IXGBE_EEPROM_MAX_RETRY_SPI; i += 5) { + ixgbe_shift_out_eeprom_bits(hw, IXGBE_EEPROM_RDSR_OPCODE_SPI, + IXGBE_EEPROM_OPCODE_BITS); + spi_stat_reg = (u8)ixgbe_shift_in_eeprom_bits(hw, 8); + if (!(spi_stat_reg & IXGBE_EEPROM_STATUS_RDY_SPI)) + break; + + udelay(5); + ixgbe_standby_eeprom(hw); + }; + + /* + * On some parts, SPI write time could vary from 0-20mSec on 3.3V + * devices (and only 0-5mSec on 5V devices) + */ + if (i >= IXGBE_EEPROM_MAX_RETRY_SPI) { + hw_dbg(hw, "SPI EEPROM Status error\n"); + status = IXGBE_ERR_EEPROM; + } + + return status; +} + +/** + * ixgbe_standby_eeprom - Returns EEPROM to a "standby" state + * @hw: pointer to hardware structure + **/ +static void ixgbe_standby_eeprom(struct ixgbe_hw *hw) +{ + u32 eec; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* Toggle CS to flush commands */ + eec |= IXGBE_EEC_CS; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); + eec &= ~IXGBE_EEC_CS; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); +} + +/** + * ixgbe_shift_out_eeprom_bits - Shift data bits out to the EEPROM. + * @hw: pointer to hardware structure + * @data: data to send to the EEPROM + * @count: number of bits to shift out + **/ +static void ixgbe_shift_out_eeprom_bits(struct ixgbe_hw *hw, u16 data, + u16 count) +{ + u32 eec; + u32 mask; + u32 i; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + /* + * Mask is used to shift "count" bits of "data" out to the EEPROM + * one bit at a time. Determine the starting bit based on count + */ + mask = 0x01 << (count - 1); + + for (i = 0; i < count; i++) { + /* + * A "1" is shifted out to the EEPROM by setting bit "DI" to a + * "1", and then raising and then lowering the clock (the SK + * bit controls the clock input to the EEPROM). A "0" is + * shifted out to the EEPROM by setting "DI" to "0" and then + * raising and then lowering the clock. + */ + if (data & mask) + eec |= IXGBE_EEC_DI; + else + eec &= ~IXGBE_EEC_DI; + + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + + udelay(1); + + ixgbe_raise_eeprom_clk(hw, &eec); + ixgbe_lower_eeprom_clk(hw, &eec); + + /* + * Shift mask to signify next bit of data to shift in to the + * EEPROM + */ + mask = mask >> 1; + }; + + /* We leave the "DI" bit set to "0" when we leave this routine. */ + eec &= ~IXGBE_EEC_DI; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_shift_in_eeprom_bits - Shift data bits in from the EEPROM + * @hw: pointer to hardware structure + **/ +static u16 ixgbe_shift_in_eeprom_bits(struct ixgbe_hw *hw, u16 count) +{ + u32 eec; + u32 i; + u16 data = 0; + + /* + * In order to read a register from the EEPROM, we need to shift + * 'count' bits in from the EEPROM. Bits are "shifted in" by raising + * the clock input to the EEPROM (setting the SK bit), and then reading + * the value of the "DO" bit. During this "shifting in" process the + * "DI" bit should always be clear. + */ + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + eec &= ~(IXGBE_EEC_DO | IXGBE_EEC_DI); + + for (i = 0; i < count; i++) { + data = data << 1; + ixgbe_raise_eeprom_clk(hw, &eec); + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + eec &= ~(IXGBE_EEC_DI); + if (eec & IXGBE_EEC_DO) + data |= 1; + + ixgbe_lower_eeprom_clk(hw, &eec); + } + + return data; +} + +/** + * ixgbe_raise_eeprom_clk - Raises the EEPROM's clock input. + * @hw: pointer to hardware structure + * @eec: EEC register's current value + **/ +static void ixgbe_raise_eeprom_clk(struct ixgbe_hw *hw, u32 *eec) +{ + /* + * Raise the clock input to the EEPROM + * (setting the SK bit), then delay + */ + *eec = *eec | IXGBE_EEC_SK; + IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); +} + +/** + * ixgbe_lower_eeprom_clk - Lowers the EEPROM's clock input. + * @hw: pointer to hardware structure + * @eecd: EECD's current value + **/ +static void ixgbe_lower_eeprom_clk(struct ixgbe_hw *hw, u32 *eec) +{ + /* + * Lower the clock input to the EEPROM (clearing the SK bit), then + * delay + */ + *eec = *eec & ~IXGBE_EEC_SK; + IXGBE_WRITE_REG(hw, IXGBE_EEC, *eec); + IXGBE_WRITE_FLUSH(hw); + udelay(1); +} + +/** + * ixgbe_release_eeprom - Release EEPROM, release semaphores + * @hw: pointer to hardware structure + **/ +static void ixgbe_release_eeprom(struct ixgbe_hw *hw) +{ + u32 eec; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + + eec |= IXGBE_EEC_CS; /* Pull CS high */ + eec &= ~IXGBE_EEC_SK; /* Lower SCK */ + + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + IXGBE_WRITE_FLUSH(hw); + + udelay(1); + + /* Stop requesting EEPROM access */ + eec &= ~IXGBE_EEC_REQ; + IXGBE_WRITE_REG(hw, IXGBE_EEC, eec); + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + + /* Delay before attempt to obtain semaphore again to allow FW access */ + msleep(hw->eeprom.semaphore_delay); +} + +/** + * ixgbe_calc_eeprom_checksum_generic - Calculates and returns the checksum + * @hw: pointer to hardware structure + **/ +u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw) +{ + u16 i; + u16 j; + u16 checksum = 0; + u16 length = 0; + u16 pointer = 0; + u16 word = 0; + + /* Include 0x0-0x3F in the checksum */ + for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { + if (hw->eeprom.ops.read(hw, i, &word) != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + checksum += word; + } + + /* Include all data from pointers except for the fw pointer */ + for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { + hw->eeprom.ops.read(hw, i, &pointer); + + /* Make sure the pointer seems valid */ + if (pointer != 0xFFFF && pointer != 0) { + hw->eeprom.ops.read(hw, pointer, &length); + + if (length != 0xFFFF && length != 0) { + for (j = pointer+1; j <= pointer+length; j++) { + hw->eeprom.ops.read(hw, j, &word); + checksum += word; + } + } + } + } + + checksum = (u16)IXGBE_EEPROM_SUM - checksum; + + return checksum; +} + +/** + * ixgbe_validate_eeprom_checksum_generic - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum. If the + * caller does not need checksum_val, the value can be NULL. + **/ +s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, + u16 *checksum_val) +{ + s32 status; + u16 checksum; + u16 read_checksum = 0; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status == 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + + hw->eeprom.ops.read(hw, IXGBE_EEPROM_CHECKSUM, &read_checksum); + + /* + * Verify read checksum from EEPROM is the same as + * calculated checksum + */ + if (read_checksum != checksum) + status = IXGBE_ERR_EEPROM_CHECKSUM; + + /* If the user cares, return the calculated checksum */ + if (checksum_val) + *checksum_val = checksum; + } else { + hw_dbg(hw, "EEPROM read failed\n"); + } + + return status; +} + +/** + * ixgbe_update_eeprom_checksum_generic - Updates the EEPROM checksum + * @hw: pointer to hardware structure + **/ +s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw) +{ + s32 status; + u16 checksum; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status == 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + status = hw->eeprom.ops.write(hw, IXGBE_EEPROM_CHECKSUM, + checksum); + } else { + hw_dbg(hw, "EEPROM read failed\n"); + } + + return status; +} + +/** + * ixgbe_validate_mac_addr - Validate MAC address + * @mac_addr: pointer to MAC address. + * + * Tests a MAC address to ensure it is a valid Individual Address + **/ +s32 ixgbe_validate_mac_addr(u8 *mac_addr) +{ + s32 status = 0; + + /* Make sure it is not a multicast address */ + if (IXGBE_IS_MULTICAST(mac_addr)) { + hw_dbg(hw, "MAC address is multicast\n"); + status = IXGBE_ERR_INVALID_MAC_ADDR; + /* Not a broadcast address */ + } else if (IXGBE_IS_BROADCAST(mac_addr)) { + hw_dbg(hw, "MAC address is broadcast\n"); + status = IXGBE_ERR_INVALID_MAC_ADDR; + /* Reject the zero address */ + } else if (mac_addr[0] == 0 && mac_addr[1] == 0 && mac_addr[2] == 0 && + mac_addr[3] == 0 && mac_addr[4] == 0 && mac_addr[5] == 0) { + hw_dbg(hw, "MAC address is all zeros\n"); + status = IXGBE_ERR_INVALID_MAC_ADDR; + } + return status; +} + +/** + * ixgbe_set_rar_generic - Set Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * @addr: Address to put into receive address register + * @vmdq: VMDq "set" or "pool" index + * @enable_addr: set flag that address is active + * + * Puts an ethernet address into a receive address register. + **/ +s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr) +{ + u32 rar_low, rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (index >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + /* setup VMDq pool selection before this RAR gets enabled */ + hw->mac.ops.set_vmdq(hw, index, vmdq); + + /* + * HW expects these in little endian so we reverse the byte + * order from network order (big endian) to little endian + */ + rar_low = ((u32)addr[0] | + ((u32)addr[1] << 8) | + ((u32)addr[2] << 16) | + ((u32)addr[3] << 24)); + /* + * Some parts put the VMDq setting in the extra RAH bits, + * so save everything except the lower 16 bits that hold part + * of the address and the address valid bit. + */ + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); + rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); + rar_high |= ((u32)addr[4] | ((u32)addr[5] << 8)); + + if (enable_addr != 0) + rar_high |= IXGBE_RAH_AV; + + IXGBE_WRITE_REG(hw, IXGBE_RAL(index), rar_low); + IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); + + return 0; +} + +/** + * ixgbe_clear_rar_generic - Remove Rx address register + * @hw: pointer to hardware structure + * @index: Receive address register to write + * + * Clears an ethernet address from a receive address register. + **/ +s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 rar_high; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (index >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", index); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + /* + * Some parts put the VMDq setting in the extra RAH bits, + * so save everything except the lower 16 bits that hold part + * of the address and the address valid bit. + */ + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(index)); + rar_high &= ~(0x0000FFFF | IXGBE_RAH_AV); + + IXGBE_WRITE_REG(hw, IXGBE_RAL(index), 0); + IXGBE_WRITE_REG(hw, IXGBE_RAH(index), rar_high); + + /* clear VMDq pool/queue selection for this RAR */ + hw->mac.ops.clear_vmdq(hw, index, IXGBE_CLEAR_VMDQ_ALL); + + return 0; +} + +/** + * ixgbe_init_rx_addrs_generic - Initializes receive address filters. + * @hw: pointer to hardware structure + * + * Places the MAC address in receive address register 0 and clears the rest + * of the receive address registers. Clears the multicast table. Assumes + * the receiver is in reset when the routine is called. + **/ +s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw) +{ + u32 i; + u32 rar_entries = hw->mac.num_rar_entries; + + /* + * If the current mac address is valid, assume it is a software override + * to the permanent address. + * Otherwise, use the permanent address from the eeprom. + */ + if (ixgbe_validate_mac_addr(hw->mac.addr) == + IXGBE_ERR_INVALID_MAC_ADDR) { + /* Get the MAC address from the RAR0 for later reference */ + hw->mac.ops.get_mac_addr(hw, hw->mac.addr); + + hw_dbg(hw, " Keeping Current RAR0 Addr =%.2X %.2X %.2X ", + hw->mac.addr[0], hw->mac.addr[1], + hw->mac.addr[2]); + hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3], + hw->mac.addr[4], hw->mac.addr[5]); + } else { + /* Setup the receive address. */ + hw_dbg(hw, "Overriding MAC Address in RAR[0]\n"); + hw_dbg(hw, " New MAC Addr =%.2X %.2X %.2X ", + hw->mac.addr[0], hw->mac.addr[1], + hw->mac.addr[2]); + hw_dbg(hw, "%.2X %.2X %.2X\n", hw->mac.addr[3], + hw->mac.addr[4], hw->mac.addr[5]); + + hw->mac.ops.set_rar(hw, 0, hw->mac.addr, 0, IXGBE_RAH_AV); + + /* clear VMDq pool/queue selection for RAR 0 */ + hw->mac.ops.clear_vmdq(hw, 0, IXGBE_CLEAR_VMDQ_ALL); + } + hw->addr_ctrl.overflow_promisc = 0; + + hw->addr_ctrl.rar_used_count = 1; + + /* Zero out the other receive addresses. */ + hw_dbg(hw, "Clearing RAR[1-%d]\n", rar_entries - 1); + for (i = 1; i < rar_entries; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RAL(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_RAH(i), 0); + } + + /* Clear the MTA */ + hw->addr_ctrl.mta_in_use = 0; + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type); + + hw_dbg(hw, " Clearing MTA\n"); + for (i = 0; i < hw->mac.mcft_size; i++) + IXGBE_WRITE_REG(hw, IXGBE_MTA(i), 0); + + ixgbe_init_uta_tables(hw); + + return 0; +} + +/** + * ixgbe_add_uc_addr - Adds a secondary unicast address. + * @hw: pointer to hardware structure + * @addr: new address + * + * Adds it to unused receive address register or goes into promiscuous mode. + **/ +void ixgbe_add_uc_addr(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) +{ + u32 rar_entries = hw->mac.num_rar_entries; + u32 rar; + + hw_dbg(hw, " UC Addr = %.2X %.2X %.2X %.2X %.2X %.2X\n", + addr[0], addr[1], addr[2], addr[3], addr[4], addr[5]); + + /* + * Place this address in the RAR if there is room, + * else put the controller into promiscuous mode + */ + if (hw->addr_ctrl.rar_used_count < rar_entries) { + rar = hw->addr_ctrl.rar_used_count; + hw->mac.ops.set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); + hw_dbg(hw, "Added a secondary address to RAR[%d]\n", rar); + hw->addr_ctrl.rar_used_count++; + } else { + hw->addr_ctrl.overflow_promisc++; + } + + hw_dbg(hw, "ixgbe_add_uc_addr Complete\n"); +} + +/** + * ixgbe_update_uc_addr_list_generic - Updates MAC list of secondary addresses + * @hw: pointer to hardware structure + * @addr_list: the list of new addresses + * @addr_count: number of addresses + * @next: iterator function to walk the address list + * + * The given list replaces any existing list. Clears the secondary addrs from + * receive address registers. Uses unused receive address registers for the + * first secondary addresses, and falls back to promiscuous mode as needed. + * + * Drivers using secondary unicast addresses must set user_set_promisc when + * manually putting the device into promiscuous mode. + **/ +s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr next) +{ + u8 *addr; + u32 i; + u32 old_promisc_setting = hw->addr_ctrl.overflow_promisc; + u32 uc_addr_in_use; + u32 fctrl; + u32 vmdq; + + /* + * Clear accounting of old secondary address list, + * don't count RAR[0] + */ + uc_addr_in_use = hw->addr_ctrl.rar_used_count - 1; + hw->addr_ctrl.rar_used_count -= uc_addr_in_use; + hw->addr_ctrl.overflow_promisc = 0; + + /* Zero out the other receive addresses */ + hw_dbg(hw, "Clearing RAR[1-%d]\n", uc_addr_in_use+1); + for (i = 0; i < uc_addr_in_use; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RAL(1+i), 0); + IXGBE_WRITE_REG(hw, IXGBE_RAH(1+i), 0); + } + + /* Add the new addresses */ + for (i = 0; i < addr_count; i++) { + hw_dbg(hw, " Adding the secondary addresses:\n"); + addr = next(hw, &addr_list, &vmdq); + ixgbe_add_uc_addr(hw, addr, vmdq); + } + + if (hw->addr_ctrl.overflow_promisc) { + /* enable promisc if not already in overflow or set by user */ + if (!old_promisc_setting && !hw->addr_ctrl.user_set_promisc) { + hw_dbg(hw, " Entering address overflow promisc mode\n"); + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl |= IXGBE_FCTRL_UPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + } + } else { + /* only disable if set by overflow, not by user */ + if (old_promisc_setting && !hw->addr_ctrl.user_set_promisc) { + hw_dbg(hw, " Leaving address overflow promisc mode\n"); + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + fctrl &= ~IXGBE_FCTRL_UPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); + } + } + + hw_dbg(hw, "ixgbe_update_uc_addr_list_generic Complete\n"); + return 0; +} + +/** + * ixgbe_mta_vector - Determines bit-vector in multicast table to set + * @hw: pointer to hardware structure + * @mc_addr: the multicast address + * + * Extracts the 12 bits, from a multicast address, to determine which + * bit-vector to set in the multicast table. The hardware uses 12 bits, from + * incoming rx multicast addresses, to determine the bit-vector to check in + * the MTA. Which of the 4 combination, of 12-bits, the hardware uses is set + * by the MO field of the MCSTCTRL. The MO field is set during initialization + * to mc_filter_type. + **/ +static s32 ixgbe_mta_vector(struct ixgbe_hw *hw, u8 *mc_addr) +{ + u32 vector = 0; + + switch (hw->mac.mc_filter_type) { + case 0: /* use bits [47:36] of the address */ + vector = ((mc_addr[4] >> 4) | (((u16)mc_addr[5]) << 4)); + break; + case 1: /* use bits [46:35] of the address */ + vector = ((mc_addr[4] >> 3) | (((u16)mc_addr[5]) << 5)); + break; + case 2: /* use bits [45:34] of the address */ + vector = ((mc_addr[4] >> 2) | (((u16)mc_addr[5]) << 6)); + break; + case 3: /* use bits [43:32] of the address */ + vector = ((mc_addr[4]) | (((u16)mc_addr[5]) << 8)); + break; + default: /* Invalid mc_filter_type */ + hw_dbg(hw, "MC filter type param set incorrectly\n"); + break; + } + + /* vector can only be 12-bits or boundary will be exceeded */ + vector &= 0xFFF; + return vector; +} + +/** + * ixgbe_set_mta - Set bit-vector in multicast table + * @hw: pointer to hardware structure + * @hash_value: Multicast address hash value + * + * Sets the bit-vector in the multicast table. + **/ +void ixgbe_set_mta(struct ixgbe_hw *hw, u8 *mc_addr) +{ + u32 vector; + u32 vector_bit; + u32 vector_reg; + + hw->addr_ctrl.mta_in_use++; + + vector = ixgbe_mta_vector(hw, mc_addr); + hw_dbg(hw, " bit-vector = 0x%03X\n", vector); + + /* + * The MTA is a register array of 128 32-bit registers. It is treated + * like an array of 4096 bits. We want to set bit + * BitArray[vector_value]. So we figure out what register the bit is + * in, read it, OR in the new bit, then write back the new value. The + * register is determined by the upper 7 bits of the vector value and + * the bit within that register are determined by the lower 5 bits of + * the value. + */ + vector_reg = (vector >> 5) & 0x7F; + vector_bit = vector & 0x1F; + hw->mac.mta_shadow[vector_reg] |= (1 << vector_bit); +} + +/** + * ixgbe_update_mc_addr_list_generic - Updates MAC list of multicast addresses + * @hw: pointer to hardware structure + * @mc_addr_list: the list of new multicast addresses + * @mc_addr_count: number of addresses + * @next: iterator function to walk the multicast address list + * @clear: flag, when set clears the table beforehand + * + * When the clear flag is set, the given list replaces any existing list. + * Hashes the given addresses into the multicast table. + **/ +s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, ixgbe_mc_addr_itr next, + bool clear) +{ + u32 i; + u32 vmdq; + + /* + * Set the new number of MC addresses that we are being requested to + * use. + */ + hw->addr_ctrl.num_mc_addrs = mc_addr_count; + hw->addr_ctrl.mta_in_use = 0; + + /* Clear mta_shadow */ + if (clear) { + hw_dbg(hw, " Clearing MTA\n"); + memset(&hw->mac.mta_shadow, 0, sizeof(hw->mac.mta_shadow)); + } + + /* Update mta_shadow */ + for (i = 0; i < mc_addr_count; i++) { + hw_dbg(hw, " Adding the multicast addresses:\n"); + ixgbe_set_mta(hw, next(hw, &mc_addr_list, &vmdq)); + } + + /* Enable mta */ + for (i = 0; i < hw->mac.mcft_size; i++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_MTA(0), i, + hw->mac.mta_shadow[i]); + + if (hw->addr_ctrl.mta_in_use > 0) + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, + IXGBE_MCSTCTRL_MFE | hw->mac.mc_filter_type); + + hw_dbg(hw, "ixgbe_update_mc_addr_list_generic Complete\n"); + return 0; +} + +/** + * ixgbe_enable_mc_generic - Enable multicast address in RAR + * @hw: pointer to hardware structure + * + * Enables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_addr_filter_info *a = &hw->addr_ctrl; + + if (a->mta_in_use > 0) + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, IXGBE_MCSTCTRL_MFE | + hw->mac.mc_filter_type); + + return 0; +} + +/** + * ixgbe_disable_mc_generic - Disable multicast address in RAR + * @hw: pointer to hardware structure + * + * Disables multicast address in RAR and the use of the multicast hash table. + **/ +s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_addr_filter_info *a = &hw->addr_ctrl; + + if (a->mta_in_use > 0) + IXGBE_WRITE_REG(hw, IXGBE_MCSTCTRL, hw->mac.mc_filter_type); + + return 0; +} + +/** + * ixgbe_fc_enable_generic - Enable flow control + * @hw: pointer to hardware structure + * + * Enable flow control according to the current settings. + **/ +s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + u32 mflcn_reg, fccfg_reg; + u32 reg; + u32 fcrtl, fcrth; + int i; + + /* Validate the water mark configuration */ + if (!hw->fc.pause_time) { + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + + /* Low water mark of zero causes XOFF floods */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + if (!hw->fc.low_water[i] || + hw->fc.low_water[i] >= hw->fc.high_water[i]) { + hw_dbg(hw, "Invalid water mark configuration\n"); + ret_val = IXGBE_ERR_INVALID_LINK_SETTINGS; + goto out; + } + } + } + + /* Negotiate the fc mode to use */ + ixgbe_fc_autoneg(hw); + + /* Disable any previous flow control settings */ + mflcn_reg = IXGBE_READ_REG(hw, IXGBE_MFLCN); + mflcn_reg &= ~(IXGBE_MFLCN_RPFCE_MASK | IXGBE_MFLCN_RFCE); + + fccfg_reg = IXGBE_READ_REG(hw, IXGBE_FCCFG); + fccfg_reg &= ~(IXGBE_FCCFG_TFCE_802_3X | IXGBE_FCCFG_TFCE_PRIORITY); + + /* + * The possible values of fc.current_mode are: + * 0: Flow control is completely disabled + * 1: Rx flow control is enabled (we can receive pause frames, + * but not send pause frames). + * 2: Tx flow control is enabled (we can send pause frames but + * we do not support receiving pause frames). + * 3: Both Rx and Tx flow control (symmetric) are enabled. + * other: Invalid. + */ + switch (hw->fc.current_mode) { + case ixgbe_fc_none: + /* + * Flow control is disabled by software override or autoneg. + * The code below will actually disable it in the HW. + */ + break; + case ixgbe_fc_rx_pause: + /* + * Rx Flow control is enabled and Tx Flow control is + * disabled by software override. Since there really + * isn't a way to advertise that we are capable of RX + * Pause ONLY, we will advertise that we support both + * symmetric and asymmetric Rx PAUSE. Later, we will + * disable the adapter's ability to send PAUSE frames. + */ + mflcn_reg |= IXGBE_MFLCN_RFCE; + break; + case ixgbe_fc_tx_pause: + /* + * Tx Flow control is enabled, and Rx Flow control is + * disabled by software override. + */ + fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X; + break; + case ixgbe_fc_full: + /* Flow control (both Rx and Tx) is enabled by SW override. */ + mflcn_reg |= IXGBE_MFLCN_RFCE; + fccfg_reg |= IXGBE_FCCFG_TFCE_802_3X; + break; + default: + hw_dbg(hw, "Flow control param set incorrectly\n"); + ret_val = IXGBE_ERR_CONFIG; + goto out; + break; + } + + /* Set 802.3x based flow control settings. */ + mflcn_reg |= IXGBE_MFLCN_DPF; + IXGBE_WRITE_REG(hw, IXGBE_MFLCN, mflcn_reg); + IXGBE_WRITE_REG(hw, IXGBE_FCCFG, fccfg_reg); + + + /* Set up and enable Rx high/low water mark thresholds, enable XON. */ + for (i = 0; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) { + if ((hw->fc.current_mode & ixgbe_fc_tx_pause) && + hw->fc.high_water[i]) { + fcrtl = (hw->fc.low_water[i] << 10) | IXGBE_FCRTL_XONE; + IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), fcrtl); + fcrth = (hw->fc.high_water[i] << 10) | IXGBE_FCRTH_FCEN; + } else { + IXGBE_WRITE_REG(hw, IXGBE_FCRTL_82599(i), 0); + /* + * In order to prevent Tx hangs when the internal Tx + * switch is enabled we must set the high water mark + * to the maximum FCRTH value. This allows the Tx + * switch to function even under heavy Rx workloads. + */ + fcrth = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)) - 32; + } + + IXGBE_WRITE_REG(hw, IXGBE_FCRTH_82599(i), fcrth); + } + + /* Configure pause time (2 TCs per register) */ + reg = hw->fc.pause_time * 0x00010001; + for (i = 0; i < (IXGBE_DCB_MAX_TRAFFIC_CLASS / 2); i++) + IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg); + + /* Configure flow control refresh threshold value */ + IXGBE_WRITE_REG(hw, IXGBE_FCRTV, hw->fc.pause_time / 2); + +out: + return ret_val; +} + +/** + * ixgbe_negotiate_fc - Negotiate flow control + * @hw: pointer to hardware structure + * @adv_reg: flow control advertised settings + * @lp_reg: link partner's flow control settings + * @adv_sym: symmetric pause bit in advertisement + * @adv_asm: asymmetric pause bit in advertisement + * @lp_sym: symmetric pause bit in link partner advertisement + * @lp_asm: asymmetric pause bit in link partner advertisement + * + * Find the intersection between advertised settings and link partner's + * advertised settings + **/ +static s32 ixgbe_negotiate_fc(struct ixgbe_hw *hw, u32 adv_reg, u32 lp_reg, + u32 adv_sym, u32 adv_asm, u32 lp_sym, u32 lp_asm) +{ + if ((!(adv_reg)) || (!(lp_reg))) + return IXGBE_ERR_FC_NOT_NEGOTIATED; + + if ((adv_reg & adv_sym) && (lp_reg & lp_sym)) { + /* + * Now we need to check if the user selected Rx ONLY + * of pause frames. In this case, we had to advertise + * FULL flow control because we could not advertise RX + * ONLY. Hence, we must now check to see if we need to + * turn OFF the TRANSMISSION of PAUSE frames. + */ + if (hw->fc.requested_mode == ixgbe_fc_full) { + hw->fc.current_mode = ixgbe_fc_full; + hw_dbg(hw, "Flow Control = FULL.\n"); + } else { + hw->fc.current_mode = ixgbe_fc_rx_pause; + hw_dbg(hw, "Flow Control=RX PAUSE frames only\n"); + } + } else if (!(adv_reg & adv_sym) && (adv_reg & adv_asm) && + (lp_reg & lp_sym) && (lp_reg & lp_asm)) { + hw->fc.current_mode = ixgbe_fc_tx_pause; + hw_dbg(hw, "Flow Control = TX PAUSE frames only.\n"); + } else if ((adv_reg & adv_sym) && (adv_reg & adv_asm) && + !(lp_reg & lp_sym) && (lp_reg & lp_asm)) { + hw->fc.current_mode = ixgbe_fc_rx_pause; + hw_dbg(hw, "Flow Control = RX PAUSE frames only.\n"); + } else { + hw->fc.current_mode = ixgbe_fc_none; + hw_dbg(hw, "Flow Control = NONE.\n"); + } + return 0; +} + +/** + * ixgbe_fc_autoneg_fiber - Enable flow control on 1 gig fiber + * @hw: pointer to hardware structure + * + * Enable flow control according on 1 gig fiber. + **/ +static s32 ixgbe_fc_autoneg_fiber(struct ixgbe_hw *hw) +{ + u32 pcs_anadv_reg, pcs_lpab_reg, linkstat; + s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; + + /* + * On multispeed fiber at 1g, bail out if + * - link is up but AN did not complete, or if + * - link is up and AN completed but timed out + */ + + linkstat = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); + if ((!!(linkstat & IXGBE_PCS1GLSTA_AN_COMPLETE) == 0) || + (!!(linkstat & IXGBE_PCS1GLSTA_AN_TIMED_OUT) == 1)) + goto out; + + pcs_anadv_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + pcs_lpab_reg = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); + + ret_val = ixgbe_negotiate_fc(hw, pcs_anadv_reg, + pcs_lpab_reg, IXGBE_PCS1GANA_SYM_PAUSE, + IXGBE_PCS1GANA_ASM_PAUSE, + IXGBE_PCS1GANA_SYM_PAUSE, + IXGBE_PCS1GANA_ASM_PAUSE); + +out: + return ret_val; +} + +/** + * ixgbe_fc_autoneg_backplane - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + * + * Enable flow control according to IEEE clause 37. + **/ +static s32 ixgbe_fc_autoneg_backplane(struct ixgbe_hw *hw) +{ + u32 links2, anlp1_reg, autoc_reg, links; + s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; + + /* + * On backplane, bail out if + * - backplane autoneg was not completed, or if + * - we are 82599 and link partner is not AN enabled + */ + links = IXGBE_READ_REG(hw, IXGBE_LINKS); + if ((links & IXGBE_LINKS_KX_AN_COMP) == 0) + goto out; + + if (hw->mac.type == ixgbe_mac_82599EB) { + links2 = IXGBE_READ_REG(hw, IXGBE_LINKS2); + if ((links2 & IXGBE_LINKS2_AN_SUPPORTED) == 0) + goto out; + } + /* + * Read the 10g AN autoc and LP ability registers and resolve + * local flow control settings accordingly + */ + autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + anlp1_reg = IXGBE_READ_REG(hw, IXGBE_ANLP1); + + ret_val = ixgbe_negotiate_fc(hw, autoc_reg, + anlp1_reg, IXGBE_AUTOC_SYM_PAUSE, IXGBE_AUTOC_ASM_PAUSE, + IXGBE_ANLP1_SYM_PAUSE, IXGBE_ANLP1_ASM_PAUSE); + +out: + return ret_val; +} + +/** + * ixgbe_fc_autoneg_copper - Enable flow control IEEE clause 37 + * @hw: pointer to hardware structure + * + * Enable flow control according to IEEE clause 37. + **/ +static s32 ixgbe_fc_autoneg_copper(struct ixgbe_hw *hw) +{ + u16 technology_ability_reg = 0; + u16 lp_technology_ability_reg = 0; + + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_ADVT, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &technology_ability_reg); + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_LP, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &lp_technology_ability_reg); + + return ixgbe_negotiate_fc(hw, (u32)technology_ability_reg, + (u32)lp_technology_ability_reg, + IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE, + IXGBE_TAF_SYM_PAUSE, IXGBE_TAF_ASM_PAUSE); +} + +/** + * ixgbe_fc_autoneg - Configure flow control + * @hw: pointer to hardware structure + * + * Compares our advertised flow control capabilities to those advertised by + * our link partner, and determines the proper flow control mode to use. + **/ +void ixgbe_fc_autoneg(struct ixgbe_hw *hw) +{ + s32 ret_val = IXGBE_ERR_FC_NOT_NEGOTIATED; + ixgbe_link_speed speed; + bool link_up; + + /* + * AN should have completed when the cable was plugged in. + * Look for reasons to bail out. Bail out if: + * - FC autoneg is disabled, or if + * - link is not up. + */ + if (hw->fc.disable_fc_autoneg) + goto out; + + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (!link_up) + goto out; + + switch (hw->phy.media_type) { + /* Autoneg flow control on fiber adapters */ + case ixgbe_media_type_fiber: + if (speed == IXGBE_LINK_SPEED_1GB_FULL) + ret_val = ixgbe_fc_autoneg_fiber(hw); + break; + + /* Autoneg flow control on backplane adapters */ + case ixgbe_media_type_backplane: + ret_val = ixgbe_fc_autoneg_backplane(hw); + break; + + /* Autoneg flow control on copper adapters */ + case ixgbe_media_type_copper: + if (ixgbe_device_supports_autoneg_fc(hw) == 0) + ret_val = ixgbe_fc_autoneg_copper(hw); + break; + + default: + break; + } + +out: + if (ret_val == 0) { + hw->fc.fc_was_autonegged = true; + } else { + hw->fc.fc_was_autonegged = false; + hw->fc.current_mode = hw->fc.requested_mode; + } +} + +/** + * ixgbe_disable_pcie_master - Disable PCI-express master access + * @hw: pointer to hardware structure + * + * Disables PCI-Express master access and verifies there are no pending + * requests. IXGBE_ERR_MASTER_REQUESTS_PENDING is returned if master disable + * bit hasn't caused the master requests to be disabled, else 0 + * is returned signifying master requests disabled. + **/ +s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 i; + + /* Always set this bit to ensure any future transactions are blocked */ + IXGBE_WRITE_REG(hw, IXGBE_CTRL, IXGBE_CTRL_GIO_DIS); + + /* Exit if master requets are blocked */ + if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) + goto out; + + /* Poll for master request bit to clear */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + udelay(100); + if (!(IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_GIO)) + goto out; + } + + /* + * Two consecutive resets are required via CTRL.RST per datasheet + * 5.2.5.3.2 Master Disable. We set a flag to inform the reset routine + * of this need. The first reset prevents new master requests from + * being issued by our device. We then must wait 1usec or more for any + * remaining completions from the PCIe bus to trickle in, and then reset + * again to clear out any effects they may have had on our device. + */ + hw_dbg(hw, "GIO Master Disable bit didn't clear - requesting resets\n"); + hw->mac.flags |= IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + + /* + * Before proceeding, make sure that the PCIe block does not have + * transactions pending. + */ + for (i = 0; i < IXGBE_PCI_MASTER_DISABLE_TIMEOUT; i++) { + udelay(100); + if (!(IXGBE_READ_PCIE_WORD(hw, IXGBE_PCI_DEVICE_STATUS) & + IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING)) + goto out; + } + + hw_dbg(hw, "PCIe transaction pending bit also did not clear.\n"); + status = IXGBE_ERR_MASTER_REQUESTS_PENDING; + +out: + return status; +} + +/** + * ixgbe_acquire_swfw_sync - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore through the GSSR register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask) +{ + u32 gssr; + u32 swmask = mask; + u32 fwmask = mask << 5; + s32 timeout = 200; + + while (timeout) { + /* + * SW EEPROM semaphore bit is used for access to all + * SW_FW_SYNC/GSSR bits (not just EEPROM) + */ + if (ixgbe_get_eeprom_semaphore(hw)) + return IXGBE_ERR_SWFW_SYNC; + + gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); + if (!(gssr & (fwmask | swmask))) + break; + + /* + * Firmware currently using resource (fwmask) or other software + * thread currently using resource (swmask) + */ + ixgbe_release_eeprom_semaphore(hw); + msleep(5); + timeout--; + } + + if (!timeout) { + hw_dbg(hw, "Driver can't access resource, SW_FW_SYNC timeout.\n"); + return IXGBE_ERR_SWFW_SYNC; + } + + gssr |= swmask; + IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr); + + ixgbe_release_eeprom_semaphore(hw); + return 0; +} + +/** + * ixgbe_release_swfw_sync - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through the GSSR register for the specified + * function (CSR, PHY0, PHY1, EEPROM, Flash) + **/ +void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask) +{ + u32 gssr; + u32 swmask = mask; + + ixgbe_get_eeprom_semaphore(hw); + + gssr = IXGBE_READ_REG(hw, IXGBE_GSSR); + gssr &= ~swmask; + IXGBE_WRITE_REG(hw, IXGBE_GSSR, gssr); + + ixgbe_release_eeprom_semaphore(hw); +} + +/** + * ixgbe_disable_sec_rx_path_generic - Stops the receive data path + * @hw: pointer to hardware structure + * + * Stops the receive data path and waits for the HW to internally empty + * the Rx security block + **/ +s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw) +{ +#define IXGBE_MAX_SECRX_POLL 40 + + int i; + int secrxreg; + + secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + secrxreg |= IXGBE_SECRXCTRL_RX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); + for (i = 0; i < IXGBE_MAX_SECRX_POLL; i++) { + secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXSTAT); + if (secrxreg & IXGBE_SECRXSTAT_SECRX_RDY) + break; + else + /* Use interrupt-safe sleep just in case */ + udelay(1000); + } + + /* For informational purposes only */ + if (i >= IXGBE_MAX_SECRX_POLL) + hw_dbg(hw, "Rx unit being enabled before security " + "path fully disabled. Continuing with init.\n"); + + return 0; +} + +/** + * ixgbe_enable_sec_rx_path_generic - Enables the receive data path + * @hw: pointer to hardware structure + * + * Enables the receive data path. + **/ +s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw) +{ + int secrxreg; + + secrxreg = IXGBE_READ_REG(hw, IXGBE_SECRXCTRL); + secrxreg &= ~IXGBE_SECRXCTRL_RX_DIS; + IXGBE_WRITE_REG(hw, IXGBE_SECRXCTRL, secrxreg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_enable_rx_dma_generic - Enable the Rx DMA unit + * @hw: pointer to hardware structure + * @regval: register value to write to RXCTRL + * + * Enables the Rx DMA unit + **/ +s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval) +{ + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, regval); + + return 0; +} + +/** + * ixgbe_blink_led_start_generic - Blink LED based on index. + * @hw: pointer to hardware structure + * @index: led number to blink + **/ +s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index) +{ + ixgbe_link_speed speed = 0; + bool link_up = 0; + u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + /* + * Link must be up to auto-blink the LEDs; + * Force it if link is down. + */ + hw->mac.ops.check_link(hw, &speed, &link_up, false); + + if (!link_up) { + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + autoc_reg |= IXGBE_AUTOC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + IXGBE_WRITE_FLUSH(hw); + msleep(10); + } + + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg |= IXGBE_LED_BLINK(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_blink_led_stop_generic - Stop blinking LED based on index. + * @hw: pointer to hardware structure + * @index: led number to stop blinking + **/ +s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index) +{ + u32 autoc_reg = IXGBE_READ_REG(hw, IXGBE_AUTOC); + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + + autoc_reg &= ~IXGBE_AUTOC_FLU; + autoc_reg |= IXGBE_AUTOC_AN_RESTART; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, autoc_reg); + + led_reg &= ~IXGBE_LED_MODE_MASK(index); + led_reg &= ~IXGBE_LED_BLINK(index); + led_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_get_san_mac_addr_offset - Get SAN MAC address offset from the EEPROM + * @hw: pointer to hardware structure + * @san_mac_offset: SAN MAC address offset + * + * This function will read the EEPROM location for the SAN MAC address + * pointer, and returns the value at that location. This is used in both + * get and set mac_addr routines. + **/ +static s32 ixgbe_get_san_mac_addr_offset(struct ixgbe_hw *hw, + u16 *san_mac_offset) +{ + /* + * First read the EEPROM pointer to see if the MAC addresses are + * available. + */ + hw->eeprom.ops.read(hw, IXGBE_SAN_MAC_ADDR_PTR, san_mac_offset); + + return 0; +} + +/** + * ixgbe_get_san_mac_addr_generic - SAN MAC address retrieval from the EEPROM + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Reads the SAN MAC address from the EEPROM, if it's available. This is + * per-port, so set_lan_id() must be called before reading the addresses. + * set_lan_id() is called by identify_sfp(), but this cannot be relied + * upon for non-SFP connections, so we must call it here. + **/ +s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + u16 san_mac_data, san_mac_offset; + u8 i; + + /* + * First read the EEPROM pointer to see if the MAC addresses are + * available. If they're not, no point in calling set_lan_id() here. + */ + ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + + if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { + /* + * No addresses available in this EEPROM. It's not an + * error though, so just wipe the local address and return. + */ + for (i = 0; i < 6; i++) + san_mac_addr[i] = 0xFF; + + goto san_mac_addr_out; + } + + /* make sure we know which port we need to program */ + hw->mac.ops.set_lan_id(hw); + /* apply the port offset to the address offset */ + (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : + (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); + for (i = 0; i < 3; i++) { + hw->eeprom.ops.read(hw, san_mac_offset, &san_mac_data); + san_mac_addr[i * 2] = (u8)(san_mac_data); + san_mac_addr[i * 2 + 1] = (u8)(san_mac_data >> 8); + san_mac_offset++; + } + +san_mac_addr_out: + return 0; +} + +/** + * ixgbe_set_san_mac_addr_generic - Write the SAN MAC address to the EEPROM + * @hw: pointer to hardware structure + * @san_mac_addr: SAN MAC address + * + * Write a SAN MAC address to the EEPROM. + **/ +s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr) +{ + s32 status = 0; + u16 san_mac_data, san_mac_offset; + u8 i; + + /* Look for SAN mac address pointer. If not defined, return */ + ixgbe_get_san_mac_addr_offset(hw, &san_mac_offset); + + if ((san_mac_offset == 0) || (san_mac_offset == 0xFFFF)) { + status = IXGBE_ERR_NO_SAN_ADDR_PTR; + goto san_mac_addr_out; + } + + /* Make sure we know which port we need to write */ + hw->mac.ops.set_lan_id(hw); + /* Apply the port offset to the address offset */ + (hw->bus.func) ? (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT1_OFFSET) : + (san_mac_offset += IXGBE_SAN_MAC_ADDR_PORT0_OFFSET); + + for (i = 0; i < 3; i++) { + san_mac_data = (u16)((u16)(san_mac_addr[i * 2 + 1]) << 8); + san_mac_data |= (u16)(san_mac_addr[i * 2]); + hw->eeprom.ops.write(hw, san_mac_offset, san_mac_data); + san_mac_offset++; + } + +san_mac_addr_out: + return status; +} + +/** + * ixgbe_get_pcie_msix_count_generic - Gets MSI-X vector count + * @hw: pointer to hardware structure + * + * Read PCIe configuration space, and get the MSI-X vector count from + * the capabilities table. + **/ +u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw) +{ + u16 msix_count = 1; + u16 max_msix_count; + u16 pcie_offset; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + pcie_offset = IXGBE_PCIE_MSIX_82598_CAPS; + max_msix_count = IXGBE_MAX_MSIX_VECTORS_82598; + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + pcie_offset = IXGBE_PCIE_MSIX_82599_CAPS; + max_msix_count = IXGBE_MAX_MSIX_VECTORS_82599; + break; + default: + return msix_count; + } + + msix_count = IXGBE_READ_PCIE_WORD(hw, pcie_offset); + msix_count &= IXGBE_PCIE_MSIX_TBL_SZ_MASK; + + /* MSI-X count is zero-based in HW */ + msix_count++; + + if (msix_count > max_msix_count) + msix_count = max_msix_count; + + return msix_count; +} + +/** + * ixgbe_insert_mac_addr_generic - Find a RAR for this mac address + * @hw: pointer to hardware structure + * @addr: Address to put into receive address register + * @vmdq: VMDq pool to assign + * + * Puts an ethernet address into a receive address register, or + * finds the rar that it is aleady in; adds to the pool list + **/ +s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq) +{ + static const u32 NO_EMPTY_RAR_FOUND = 0xFFFFFFFF; + u32 first_empty_rar = NO_EMPTY_RAR_FOUND; + u32 rar; + u32 rar_low, rar_high; + u32 addr_low, addr_high; + + /* swap bytes for HW little endian */ + addr_low = addr[0] | (addr[1] << 8) + | (addr[2] << 16) + | (addr[3] << 24); + addr_high = addr[4] | (addr[5] << 8); + + /* + * Either find the mac_id in rar or find the first empty space. + * rar_highwater points to just after the highest currently used + * rar in order to shorten the search. It grows when we add a new + * rar to the top. + */ + for (rar = 0; rar < hw->mac.rar_highwater; rar++) { + rar_high = IXGBE_READ_REG(hw, IXGBE_RAH(rar)); + + if (((IXGBE_RAH_AV & rar_high) == 0) + && first_empty_rar == NO_EMPTY_RAR_FOUND) { + first_empty_rar = rar; + } else if ((rar_high & 0xFFFF) == addr_high) { + rar_low = IXGBE_READ_REG(hw, IXGBE_RAL(rar)); + if (rar_low == addr_low) + break; /* found it already in the rars */ + } + } + + if (rar < hw->mac.rar_highwater) { + /* already there so just add to the pool bits */ + ixgbe_set_vmdq(hw, rar, vmdq); + } else if (first_empty_rar != NO_EMPTY_RAR_FOUND) { + /* stick it into first empty RAR slot we found */ + rar = first_empty_rar; + ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); + } else if (rar == hw->mac.rar_highwater) { + /* add it to the top of the list and inc the highwater mark */ + ixgbe_set_rar(hw, rar, addr, vmdq, IXGBE_RAH_AV); + hw->mac.rar_highwater++; + } else if (rar >= hw->mac.num_rar_entries) { + return IXGBE_ERR_INVALID_MAC_ADDR; + } + + /* + * If we found rar[0], make sure the default pool bit (we use pool 0) + * remains cleared to be sure default pool packets will get delivered + */ + if (rar == 0) + ixgbe_clear_vmdq(hw, rar, 0); + + return rar; +} + +/** + * ixgbe_clear_vmdq_generic - Disassociate a VMDq pool index from a rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to disassociate + * @vmdq: VMDq pool index to remove from the rar + **/ +s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 mpsar_lo, mpsar_hi; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + mpsar_lo = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); + mpsar_hi = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); + + if (!mpsar_lo && !mpsar_hi) + goto done; + + if (vmdq == IXGBE_CLEAR_VMDQ_ALL) { + if (mpsar_lo) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), 0); + mpsar_lo = 0; + } + if (mpsar_hi) { + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), 0); + mpsar_hi = 0; + } + } else if (vmdq < 32) { + mpsar_lo &= ~(1 << vmdq); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar_lo); + } else { + mpsar_hi &= ~(1 << (vmdq - 32)); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar_hi); + } + + /* was that the last pool using this rar? */ + if (mpsar_lo == 0 && mpsar_hi == 0 && rar != 0) + hw->mac.ops.clear_rar(hw, rar); +done: + return 0; +} + +/** + * ixgbe_set_vmdq_generic - Associate a VMDq pool index with a rx address + * @hw: pointer to hardware struct + * @rar: receive address register index to associate with a VMDq index + * @vmdq: VMDq pool index + **/ +s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq) +{ + u32 mpsar; + u32 rar_entries = hw->mac.num_rar_entries; + + /* Make sure we are using a valid rar index range */ + if (rar >= rar_entries) { + hw_dbg(hw, "RAR index %d is out of range.\n", rar); + return IXGBE_ERR_INVALID_ARGUMENT; + } + + if (vmdq < 32) { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); + mpsar |= 1 << vmdq; + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); + } else { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); + mpsar |= 1 << (vmdq - 32); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); + } + return 0; +} + +/** + * This function should only be involved in the IOV mode. + * In IOV mode, Default pool is next pool after the number of + * VFs advertized and not 0. + * MPSAR table needs to be updated for SAN_MAC RAR [hw->mac.san_mac_rar_index] + * + * ixgbe_set_vmdq_san_mac - Associate default VMDq pool index with a rx address + * @hw: pointer to hardware struct + * @vmdq: VMDq pool index + **/ +s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq) +{ + u32 mpsar; + u32 rar = hw->mac.san_mac_rar_index; + + if (vmdq < 32) { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_LO(rar)); + mpsar |= 1 << vmdq; + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_LO(rar), mpsar); + } else { + mpsar = IXGBE_READ_REG(hw, IXGBE_MPSAR_HI(rar)); + mpsar |= 1 << (vmdq - 32); + IXGBE_WRITE_REG(hw, IXGBE_MPSAR_HI(rar), mpsar); + } + + return 0; +} + +/** + * ixgbe_init_uta_tables_generic - Initialize the Unicast Table Array + * @hw: pointer to hardware structure + **/ +s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw) +{ + int i; + + hw_dbg(hw, " Clearing UTA\n"); + + for (i = 0; i < 128; i++) + IXGBE_WRITE_REG(hw, IXGBE_UTA(i), 0); + + return 0; +} + +/** + * ixgbe_find_vlvf_slot - find the vlanid or the first empty slot + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * + * return the VLVF index where this VLAN id should be placed + * + **/ +s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan) +{ + u32 bits = 0; + u32 first_empty_slot = 0; + s32 regindex; + + /* short cut the special case */ + if (vlan == 0) + return 0; + + /* + * Search for the vlan id in the VLVF entries. Save off the first empty + * slot found along the way + */ + for (regindex = 1; regindex < IXGBE_VLVF_ENTRIES; regindex++) { + bits = IXGBE_READ_REG(hw, IXGBE_VLVF(regindex)); + if (!bits && !(first_empty_slot)) + first_empty_slot = regindex; + else if ((bits & 0x0FFF) == vlan) + break; + } + + /* + * If regindex is less than IXGBE_VLVF_ENTRIES, then we found the vlan + * in the VLVF. Else use the first empty VLVF register for this + * vlan id. + */ + if (regindex >= IXGBE_VLVF_ENTRIES) { + if (first_empty_slot) + regindex = first_empty_slot; + else { + hw_dbg(hw, "No space in VLVF.\n"); + regindex = IXGBE_ERR_NO_SPACE; + } + } + + return regindex; +} + +/** + * ixgbe_set_vfta_generic - Set VLAN filter table + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * + * Turn on/off specified VLAN in the VLAN filter table. + **/ +s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on) +{ + s32 regindex; + u32 bitindex; + u32 vfta; + u32 targetbit; + s32 ret_val = 0; + bool vfta_changed = false; + + if (vlan > 4095) + return IXGBE_ERR_PARAM; + + /* + * this is a 2 part operation - first the VFTA, then the + * VLVF and VLVFB if VT Mode is set + * We don't write the VFTA until we know the VLVF part succeeded. + */ + + /* Part 1 + * The VFTA is a bitstring made up of 128 32-bit registers + * that enable the particular VLAN id, much like the MTA: + * bits[11-5]: which register + * bits[4-0]: which bit in the register + */ + regindex = (vlan >> 5) & 0x7F; + bitindex = vlan & 0x1F; + targetbit = (1 << bitindex); + vfta = IXGBE_READ_REG(hw, IXGBE_VFTA(regindex)); + + if (vlan_on) { + if (!(vfta & targetbit)) { + vfta |= targetbit; + vfta_changed = true; + } + } else { + if ((vfta & targetbit)) { + vfta &= ~targetbit; + vfta_changed = true; + } + } + + /* Part 2 + * Call ixgbe_set_vlvf_generic to set VLVFB and VLVF + */ + ret_val = ixgbe_set_vlvf_generic(hw, vlan, vind, vlan_on, + &vfta_changed); + if (ret_val != 0) + return ret_val; + + if (vfta_changed) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(regindex), vfta); + + return 0; +} + +/** + * ixgbe_set_vlvf_generic - Set VLAN Pool Filter + * @hw: pointer to hardware structure + * @vlan: VLAN id to write to VLAN filter + * @vind: VMDq output index that maps queue to VLAN id in VFVFB + * @vlan_on: boolean flag to turn on/off VLAN in VFVF + * @vfta_changed: pointer to boolean flag which indicates whether VFTA + * should be changed + * + * Turn on/off specified bit in VLVF table. + **/ +s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool *vfta_changed) +{ + u32 vt; + + if (vlan > 4095) + return IXGBE_ERR_PARAM; + + /* If VT Mode is set + * Either vlan_on + * make sure the vlan is in VLVF + * set the vind bit in the matching VLVFB + * Or !vlan_on + * clear the pool bit and possibly the vind + */ + vt = IXGBE_READ_REG(hw, IXGBE_VT_CTL); + if (vt & IXGBE_VT_CTL_VT_ENABLE) { + s32 vlvf_index; + u32 bits; + + vlvf_index = ixgbe_find_vlvf_slot(hw, vlan); + if (vlvf_index < 0) + return vlvf_index; + + if (vlan_on) { + /* set the pool bit */ + if (vind < 32) { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB(vlvf_index * 2)); + bits |= (1 << vind); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB(vlvf_index * 2), + bits); + } else { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1)); + bits |= (1 << (vind - 32)); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1), + bits); + } + } else { + /* clear the pool bit */ + if (vind < 32) { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB(vlvf_index * 2)); + bits &= ~(1 << vind); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB(vlvf_index * 2), + bits); + bits |= IXGBE_READ_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1)); + } else { + bits = IXGBE_READ_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1)); + bits &= ~(1 << (vind - 32)); + IXGBE_WRITE_REG(hw, + IXGBE_VLVFB((vlvf_index * 2) + 1), + bits); + bits |= IXGBE_READ_REG(hw, + IXGBE_VLVFB(vlvf_index * 2)); + } + } + + /* + * If there are still bits set in the VLVFB registers + * for the VLAN ID indicated we need to see if the + * caller is requesting that we clear the VFTA entry bit. + * If the caller has requested that we clear the VFTA + * entry bit but there are still pools/VFs using this VLAN + * ID entry then ignore the request. We're not worried + * about the case where we're turning the VFTA VLAN ID + * entry bit on, only when requested to turn it off as + * there may be multiple pools and/or VFs using the + * VLAN ID entry. In that case we cannot clear the + * VFTA bit until all pools/VFs using that VLAN ID have also + * been cleared. This will be indicated by "bits" being + * zero. + */ + if (bits) { + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), + (IXGBE_VLVF_VIEN | vlan)); + if ((!vlan_on) && (vfta_changed != NULL)) { + /* someone wants to clear the vfta entry + * but some pools/VFs are still using it. + * Ignore it. */ + *vfta_changed = false; + } + } else + IXGBE_WRITE_REG(hw, IXGBE_VLVF(vlvf_index), 0); + } + + return 0; +} + +/** + * ixgbe_clear_vfta_generic - Clear VLAN filter table + * @hw: pointer to hardware structure + * + * Clears the VLAN filer table, and the VMDq index associated with the filter + **/ +s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw) +{ + u32 offset; + + for (offset = 0; offset < hw->mac.vft_size; offset++) + IXGBE_WRITE_REG(hw, IXGBE_VFTA(offset), 0); + + for (offset = 0; offset < IXGBE_VLVF_ENTRIES; offset++) { + IXGBE_WRITE_REG(hw, IXGBE_VLVF(offset), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB(offset * 2), 0); + IXGBE_WRITE_REG(hw, IXGBE_VLVFB((offset * 2) + 1), 0); + } + + return 0; +} + +/** + * ixgbe_check_mac_link_generic - Determine link and speed status + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @link_up: true when link is up + * @link_up_wait_to_complete: bool used to wait for link up or not + * + * Reads the links register to determine if link is up and the current speed + **/ +s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete) +{ + u32 links_reg, links_orig; + u32 i; + + /* clear the old state */ + links_orig = IXGBE_READ_REG(hw, IXGBE_LINKS); + + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + + if (links_orig != links_reg) { + hw_dbg(hw, "LINKS changed from %08X to %08X\n", + links_orig, links_reg); + } + + if (link_up_wait_to_complete) { + for (i = 0; i < IXGBE_LINK_UP_TIME; i++) { + if (links_reg & IXGBE_LINKS_UP) { + *link_up = true; + break; + } else { + *link_up = false; + } + msleep(100); + links_reg = IXGBE_READ_REG(hw, IXGBE_LINKS); + } + } else { + if (links_reg & IXGBE_LINKS_UP) + *link_up = true; + else + *link_up = false; + } + + if ((links_reg & IXGBE_LINKS_SPEED_82599) == + IXGBE_LINKS_SPEED_10G_82599) + *speed = IXGBE_LINK_SPEED_10GB_FULL; + else if ((links_reg & IXGBE_LINKS_SPEED_82599) == + IXGBE_LINKS_SPEED_1G_82599) + *speed = IXGBE_LINK_SPEED_1GB_FULL; + else if ((links_reg & IXGBE_LINKS_SPEED_82599) == + IXGBE_LINKS_SPEED_100_82599) + *speed = IXGBE_LINK_SPEED_100_FULL; + else + *speed = IXGBE_LINK_SPEED_UNKNOWN; + + return 0; +} + +/** + * ixgbe_get_wwn_prefix_generic - Get alternative WWNN/WWPN prefix from + * the EEPROM + * @hw: pointer to hardware structure + * @wwnn_prefix: the alternative WWNN prefix + * @wwpn_prefix: the alternative WWPN prefix + * + * This function will read the EEPROM from the alternative SAN MAC address + * block to check the support for the alternative WWNN/WWPN prefix support. + **/ +s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix) +{ + u16 offset, caps; + u16 alt_san_mac_blk_offset; + + /* clear output first */ + *wwnn_prefix = 0xFFFF; + *wwpn_prefix = 0xFFFF; + + /* check if alternative SAN MAC is supported */ + hw->eeprom.ops.read(hw, IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR, + &alt_san_mac_blk_offset); + + if ((alt_san_mac_blk_offset == 0) || + (alt_san_mac_blk_offset == 0xFFFF)) + goto wwn_prefix_out; + + /* check capability in alternative san mac address block */ + offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET; + hw->eeprom.ops.read(hw, offset, &caps); + if (!(caps & IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN)) + goto wwn_prefix_out; + + /* get the corresponding prefix for WWNN/WWPN */ + offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET; + hw->eeprom.ops.read(hw, offset, wwnn_prefix); + + offset = alt_san_mac_blk_offset + IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET; + hw->eeprom.ops.read(hw, offset, wwpn_prefix); + +wwn_prefix_out: + return 0; +} + +/** + * ixgbe_get_fcoe_boot_status_generic - Get FCOE boot status from EEPROM + * @hw: pointer to hardware structure + * @bs: the fcoe boot status + * + * This function will read the FCOE boot status from the iSCSI FCOE block + **/ +s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs) +{ + u16 offset, caps, flags; + s32 status; + + /* clear output first */ + *bs = ixgbe_fcoe_bootstatus_unavailable; + + /* check if FCOE IBA block is present */ + offset = IXGBE_FCOE_IBA_CAPS_BLK_PTR; + status = hw->eeprom.ops.read(hw, offset, &caps); + if (status != 0) + goto out; + + if (!(caps & IXGBE_FCOE_IBA_CAPS_FCOE)) + goto out; + + /* check if iSCSI FCOE block is populated */ + status = hw->eeprom.ops.read(hw, IXGBE_ISCSI_FCOE_BLK_PTR, &offset); + if (status != 0) + goto out; + + if ((offset == 0) || (offset == 0xFFFF)) + goto out; + + /* read fcoe flags in iSCSI FCOE block */ + offset = offset + IXGBE_ISCSI_FCOE_FLAGS_OFFSET; + status = hw->eeprom.ops.read(hw, offset, &flags); + if (status != 0) + goto out; + + if (flags & IXGBE_ISCSI_FCOE_FLAGS_ENABLE) + *bs = ixgbe_fcoe_bootstatus_enabled; + else + *bs = ixgbe_fcoe_bootstatus_disabled; + +out: + return status; +} + +/** + * ixgbe_set_mac_anti_spoofing - Enable/Disable MAC anti-spoofing + * @hw: pointer to hardware structure + * @enable: enable or disable switch for anti-spoofing + * @pf: Physical Function pool - do not enable anti-spoofing for the PF + * + **/ +void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf) +{ + int j; + int pf_target_reg = pf >> 3; + int pf_target_shift = pf % 8; + u32 pfvfspoof = 0; + + if (hw->mac.type == ixgbe_mac_82598EB) + return; + + if (enable) + pfvfspoof = IXGBE_SPOOF_MACAS_MASK; + + /* + * PFVFSPOOF register array is size 8 with 8 bits assigned to + * MAC anti-spoof enables in each register array element. + */ + for (j = 0; j < IXGBE_PFVFSPOOF_REG_COUNT; j++) + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(j), pfvfspoof); + + /* If not enabling anti-spoofing then done */ + if (!enable) + return; + + /* + * The PF should be allowed to spoof so that it can support + * emulation mode NICs. Reset the bit assigned to the PF + */ + pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg)); + pfvfspoof ^= (1 << pf_target_shift); + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(pf_target_reg), pfvfspoof); +} + +/** + * ixgbe_set_vlan_anti_spoofing - Enable/Disable VLAN anti-spoofing + * @hw: pointer to hardware structure + * @enable: enable or disable switch for VLAN anti-spoofing + * @pf: Virtual Function pool - VF Pool to set for VLAN anti-spoofing + * + **/ +void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf) +{ + int vf_target_reg = vf >> 3; + int vf_target_shift = vf % 8 + IXGBE_SPOOF_VLANAS_SHIFT; + u32 pfvfspoof; + + if (hw->mac.type == ixgbe_mac_82598EB) + return; + + pfvfspoof = IXGBE_READ_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg)); + if (enable) + pfvfspoof |= (1 << vf_target_shift); + else + pfvfspoof &= ~(1 << vf_target_shift); + IXGBE_WRITE_REG(hw, IXGBE_PFVFSPOOF(vf_target_reg), pfvfspoof); +} + +/** + * ixgbe_get_device_caps_generic - Get additional device capabilities + * @hw: pointer to hardware structure + * @device_caps: the EEPROM word with the extra device capabilities + * + * This function will read the EEPROM location for the device capabilities, + * and return the word through device_caps. + **/ +s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps) +{ + hw->eeprom.ops.read(hw, IXGBE_DEVICE_CAPS, device_caps); + + return 0; +} + +/** + * ixgbe_calculate_checksum - Calculate checksum for buffer + * @buffer: pointer to EEPROM + * @length: size of EEPROM to calculate a checksum for + * Calculates the checksum for some buffer on a specified length. The + * checksum calculated is returned. + **/ +static u8 ixgbe_calculate_checksum(u8 *buffer, u32 length) +{ + u32 i; + u8 sum = 0; + + if (!buffer) + return 0; + for (i = 0; i < length; i++) + sum += buffer[i]; + + return (u8) (0 - sum); +} + +/** + * ixgbe_host_interface_command - Issue command to manageability block + * @hw: pointer to the HW structure + * @buffer: contains the command to write and where the return status will + * be placed + * @length: length of buffer, must be multiple of 4 bytes + * + * Communicates with the manageability block. On success return 0 + * else return IXGBE_ERR_HOST_INTERFACE_COMMAND. + **/ +static s32 ixgbe_host_interface_command(struct ixgbe_hw *hw, u32 *buffer, + u32 length) +{ + u32 hicr, i, bi; + u32 hdr_size = sizeof(struct ixgbe_hic_hdr); + u8 buf_len, dword_len; + + s32 ret_val = 0; + + if (length == 0 || length & 0x3 || + length > IXGBE_HI_MAX_BLOCK_BYTE_LENGTH) { + hw_dbg(hw, "Buffer length failure.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Check that the host interface is enabled. */ + hicr = IXGBE_READ_REG(hw, IXGBE_HICR); + if ((hicr & IXGBE_HICR_EN) == 0) { + hw_dbg(hw, "IXGBE_HOST_EN bit disabled.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Calculate length in DWORDs */ + dword_len = length >> 2; + + /* + * The device driver writes the relevant command block + * into the ram area. + */ + for (i = 0; i < dword_len; i++) + IXGBE_WRITE_REG_ARRAY(hw, IXGBE_FLEX_MNG, + i, IXGBE_CPU_TO_LE32(buffer[i])); + + /* Setting this bit tells the ARC that a new command is pending. */ + IXGBE_WRITE_REG(hw, IXGBE_HICR, hicr | IXGBE_HICR_C); + + for (i = 0; i < IXGBE_HI_COMMAND_TIMEOUT; i++) { + hicr = IXGBE_READ_REG(hw, IXGBE_HICR); + if (!(hicr & IXGBE_HICR_C)) + break; + msleep(1); + } + + /* Check command successful completion. */ + if (i == IXGBE_HI_COMMAND_TIMEOUT || + (!(IXGBE_READ_REG(hw, IXGBE_HICR) & IXGBE_HICR_SV))) { + hw_dbg(hw, "Command has failed with no status valid.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Calculate length in DWORDs */ + dword_len = hdr_size >> 2; + + /* first pull in the header so we know the buffer length */ + for (bi = 0; bi < dword_len; bi++) { + buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); + IXGBE_LE32_TO_CPUS(&buffer[bi]); + } + + /* If there is any thing in data position pull it in */ + buf_len = ((struct ixgbe_hic_hdr *)buffer)->buf_len; + if (buf_len == 0) + goto out; + + if (length < (buf_len + hdr_size)) { + hw_dbg(hw, "Buffer not large enough for reply message.\n"); + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + goto out; + } + + /* Calculate length in DWORDs, add 3 for odd lengths */ + dword_len = (buf_len + 3) >> 2; + + /* Pull in the rest of the buffer (bi is where we left off)*/ + for (; bi <= dword_len; bi++) { + buffer[bi] = IXGBE_READ_REG_ARRAY(hw, IXGBE_FLEX_MNG, bi); + IXGBE_LE32_TO_CPUS(&buffer[bi]); + } + +out: + return ret_val; +} + +/** + * ixgbe_set_fw_drv_ver_generic - Sends driver version to firmware + * @hw: pointer to the HW structure + * @maj: driver version major number + * @min: driver version minor number + * @build: driver version build number + * @sub: driver version sub build number + * + * Sends driver version number to firmware through the manageability + * block. On success return 0 + * else returns IXGBE_ERR_SWFW_SYNC when encountering an error acquiring + * semaphore or IXGBE_ERR_HOST_INTERFACE_COMMAND when command fails. + **/ +s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 sub) +{ + struct ixgbe_hic_drv_info fw_cmd; + int i; + s32 ret_val = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM) + != 0) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + fw_cmd.hdr.cmd = FW_CEM_CMD_DRIVER_INFO; + fw_cmd.hdr.buf_len = FW_CEM_CMD_DRIVER_INFO_LEN; + fw_cmd.hdr.cmd_or_resp.cmd_resv = FW_CEM_CMD_RESERVED; + fw_cmd.port_num = (u8)hw->bus.func; + fw_cmd.ver_maj = maj; + fw_cmd.ver_min = min; + fw_cmd.ver_build = build; + fw_cmd.ver_sub = sub; + fw_cmd.hdr.checksum = 0; + fw_cmd.hdr.checksum = ixgbe_calculate_checksum((u8 *)&fw_cmd, + (FW_CEM_HDR_LEN + fw_cmd.hdr.buf_len)); + fw_cmd.pad = 0; + fw_cmd.pad2 = 0; + + for (i = 0; i <= FW_CEM_MAX_RETRIES; i++) { + ret_val = ixgbe_host_interface_command(hw, (u32 *)&fw_cmd, + sizeof(fw_cmd)); + if (ret_val != 0) + continue; + + if (fw_cmd.hdr.cmd_or_resp.ret_status == + FW_CEM_RESP_STATUS_SUCCESS) + ret_val = 0; + else + ret_val = IXGBE_ERR_HOST_INTERFACE_COMMAND; + + break; + } + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_SW_MNG_SM); +out: + return ret_val; +} + +/** + * ixgbe_set_rxpba_generic - Initialize Rx packet buffer + * @hw: pointer to hardware structure + * @num_pb: number of packet buffers to allocate + * @headroom: reserve n KB of headroom + * @strategy: packet buffer allocation strategy + **/ +void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, + int strategy) +{ + u32 pbsize = hw->mac.rx_pb_size; + int i = 0; + u32 rxpktsize, txpktsize, txpbthresh; + + /* Reserve headroom */ + pbsize -= headroom; + + if (!num_pb) + num_pb = 1; + + /* Divide remaining packet buffer space amongst the number of packet + * buffers requested using supplied strategy. + */ + switch (strategy) { + case PBA_STRATEGY_WEIGHTED: + /* ixgbe_dcb_pba_80_48 strategy weight first half of packet + * buffer with 5/8 of the packet buffer space. + */ + rxpktsize = (pbsize * 5) / (num_pb * 4); + pbsize -= rxpktsize * (num_pb / 2); + rxpktsize <<= IXGBE_RXPBSIZE_SHIFT; + for (; i < (num_pb / 2); i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + /* Fall through to configure remaining packet buffers */ + case PBA_STRATEGY_EQUAL: + rxpktsize = (pbsize / (num_pb - i)) << IXGBE_RXPBSIZE_SHIFT; + for (; i < num_pb; i++) + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), rxpktsize); + break; + default: + break; + } + + /* Only support an equally distributed Tx packet buffer strategy. */ + txpktsize = IXGBE_TXPBSIZE_MAX / num_pb; + txpbthresh = (txpktsize / 1024) - IXGBE_TXPKT_SIZE_MAX; + for (i = 0; i < num_pb; i++) { + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), txpktsize); + IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), txpbthresh); + } + + /* Clear unused TCs, if any, to zero buffer size*/ + for (; i < IXGBE_MAX_PB; i++) { + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_TXPBSIZE(i), 0); + IXGBE_WRITE_REG(hw, IXGBE_TXPBTHRESH(i), 0); + } +} + +/** + * ixgbe_clear_tx_pending - Clear pending TX work from the PCIe fifo + * @hw: pointer to the hardware structure + * + * The 82599 and x540 MACs can experience issues if TX work is still pending + * when a reset occurs. This function prevents this by flushing the PCIe + * buffers on the system. + **/ +void ixgbe_clear_tx_pending(struct ixgbe_hw *hw) +{ + u32 gcr_ext, hlreg0; + + /* + * If double reset is not requested then all transactions should + * already be clear and as such there is no work to do + */ + if (!(hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED)) + return; + + /* + * Set loopback enable to prevent any transmits from being sent + * should the link come up. This assumes that the RXCTRL.RXEN bit + * has already been cleared. + */ + hlreg0 = IXGBE_READ_REG(hw, IXGBE_HLREG0); + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0 | IXGBE_HLREG0_LPBK); + + /* initiate cleaning flow for buffers in the PCIe transaction layer */ + gcr_ext = IXGBE_READ_REG(hw, IXGBE_GCR_EXT); + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, + gcr_ext | IXGBE_GCR_EXT_BUFFERS_CLEAR); + + /* Flush all writes and allow 20usec for all transactions to clear */ + IXGBE_WRITE_FLUSH(hw); + udelay(20); + + /* restore previous register values */ + IXGBE_WRITE_REG(hw, IXGBE_GCR_EXT, gcr_ext); + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, hlreg0); +} + +static const u8 ixgbe_emc_temp_data[4] = { + IXGBE_EMC_INTERNAL_DATA, + IXGBE_EMC_DIODE1_DATA, + IXGBE_EMC_DIODE2_DATA, + IXGBE_EMC_DIODE3_DATA +}; +static const u8 ixgbe_emc_therm_limit[4] = { + IXGBE_EMC_INTERNAL_THERM_LIMIT, + IXGBE_EMC_DIODE1_THERM_LIMIT, + IXGBE_EMC_DIODE2_THERM_LIMIT, + IXGBE_EMC_DIODE3_THERM_LIMIT +}; + +/** + * ixgbe_get_thermal_sensor_data - Gathers thermal sensor data + * @hw: pointer to hardware structure + * @data: pointer to the thermal sensor data structure + * + * Returns the thermal sensor data structure + **/ +s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 i; + struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + /* Only support thermal sensors attached to 82599 physical port 0 */ + if ((hw->mac.type != ixgbe_mac_82599EB) || + (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) { + status = IXGBE_NOT_IMPLEMENTED; + goto out; + } + + status = hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset); + if (status) + goto out; + + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) { + status = IXGBE_NOT_IMPLEMENTED; + goto out; + } + + status = hw->eeprom.ops.read(hw, ets_offset, &ets_cfg); + if (status) + goto out; + + if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT) + != IXGBE_ETS_TYPE_EMC) { + status = IXGBE_NOT_IMPLEMENTED; + goto out; + } + + num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); + if (num_sensors > IXGBE_MAX_SENSORS) + num_sensors = IXGBE_MAX_SENSORS; + + for (i = 0; i < num_sensors; i++) { + status = hw->eeprom.ops.read(hw, (ets_offset + 1 + i), + &ets_sensor); + if (status) + goto out; + + sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> + IXGBE_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> + IXGBE_ETS_DATA_LOC_SHIFT); + + if (sensor_location != 0) { + status = hw->phy.ops.read_i2c_byte(hw, + ixgbe_emc_temp_data[sensor_index], + IXGBE_I2C_THERMAL_SENSOR_ADDR, + &data->sensor[i].temp); + if (status) + goto out; + } + } +out: + return status; +} + +/** + * ixgbe_init_thermal_sensor_thresh_generic - Inits thermal sensor thresholds + * @hw: pointer to hardware structure + * + * Inits the thermal sensor thresholds according to the NVM map + * and save off the threshold and location values into mac.thermal_sensor_data + **/ +s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 ets_offset; + u16 ets_cfg; + u16 ets_sensor; + u8 low_thresh_delta; + u8 num_sensors; + u8 sensor_index; + u8 sensor_location; + u8 therm_limit; + u8 i; + struct ixgbe_thermal_sensor_data *data = &hw->mac.thermal_sensor_data; + + memset(data, 0, sizeof(struct ixgbe_thermal_sensor_data)); + + /* Only support thermal sensors attached to 82599 physical port 0 */ + if ((hw->mac.type != ixgbe_mac_82599EB) || + (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1)) + return IXGBE_NOT_IMPLEMENTED; + + hw->eeprom.ops.read(hw, IXGBE_ETS_CFG, &ets_offset); + if ((ets_offset == 0x0000) || (ets_offset == 0xFFFF)) + return IXGBE_NOT_IMPLEMENTED; + + hw->eeprom.ops.read(hw, ets_offset, &ets_cfg); + if (((ets_cfg & IXGBE_ETS_TYPE_MASK) >> IXGBE_ETS_TYPE_SHIFT) + != IXGBE_ETS_TYPE_EMC) + return IXGBE_NOT_IMPLEMENTED; + + low_thresh_delta = ((ets_cfg & IXGBE_ETS_LTHRES_DELTA_MASK) >> + IXGBE_ETS_LTHRES_DELTA_SHIFT); + num_sensors = (ets_cfg & IXGBE_ETS_NUM_SENSORS_MASK); + + for (i = 0; i < num_sensors; i++) { + hw->eeprom.ops.read(hw, (ets_offset + 1 + i), &ets_sensor); + sensor_index = ((ets_sensor & IXGBE_ETS_DATA_INDEX_MASK) >> + IXGBE_ETS_DATA_INDEX_SHIFT); + sensor_location = ((ets_sensor & IXGBE_ETS_DATA_LOC_MASK) >> + IXGBE_ETS_DATA_LOC_SHIFT); + therm_limit = ets_sensor & IXGBE_ETS_DATA_HTHRESH_MASK; + + hw->phy.ops.write_i2c_byte(hw, + ixgbe_emc_therm_limit[sensor_index], + IXGBE_I2C_THERMAL_SENSOR_ADDR, therm_limit); + + if ((i < IXGBE_MAX_SENSORS) && (sensor_location != 0)) { + data->sensor[i].location = sensor_location; + data->sensor[i].caution_thresh = therm_limit; + data->sensor[i].max_op_thresh = therm_limit - + low_thresh_delta; + } + } + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h new file mode 100644 index 00000000..9bd6f534 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_common.h @@ -0,0 +1,140 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_COMMON_H_ +#define _IXGBE_COMMON_H_ + +#include "ixgbe_type.h" + +u16 ixgbe_get_pcie_msix_count_generic(struct ixgbe_hw *hw); + +s32 ixgbe_init_ops_generic(struct ixgbe_hw *hw); +s32 ixgbe_init_hw_generic(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_generic(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_gen2(struct ixgbe_hw *hw); +s32 ixgbe_clear_hw_cntrs_generic(struct ixgbe_hw *hw); +s32 ixgbe_read_pba_string_generic(struct ixgbe_hw *hw, u8 *pba_num, + u32 pba_num_size); +s32 ixgbe_get_mac_addr_generic(struct ixgbe_hw *hw, u8 *mac_addr); +s32 ixgbe_get_bus_info_generic(struct ixgbe_hw *hw); +void ixgbe_set_lan_id_multi_port_pcie(struct ixgbe_hw *hw); +s32 ixgbe_stop_adapter_generic(struct ixgbe_hw *hw); + +s32 ixgbe_led_on_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_led_off_generic(struct ixgbe_hw *hw, u32 index); + +s32 ixgbe_init_eeprom_params_generic(struct ixgbe_hw *hw); +s32 ixgbe_write_eeprom_generic(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_read_eerd_generic(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_eerd_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_write_eewr_generic(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eewr_buffer_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +s32 ixgbe_read_eeprom_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 *data); +s32 ixgbe_read_eeprom_buffer_bit_bang_generic(struct ixgbe_hw *hw, u16 offset, + u16 words, u16 *data); +u16 ixgbe_calc_eeprom_checksum_generic(struct ixgbe_hw *hw); +s32 ixgbe_validate_eeprom_checksum_generic(struct ixgbe_hw *hw, + u16 *checksum_val); +s32 ixgbe_update_eeprom_checksum_generic(struct ixgbe_hw *hw); +s32 ixgbe_poll_eerd_eewr_done(struct ixgbe_hw *hw, u32 ee_reg); + +s32 ixgbe_set_rar_generic(struct ixgbe_hw *hw, u32 index, u8 *addr, u32 vmdq, + u32 enable_addr); +s32 ixgbe_clear_rar_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_init_rx_addrs_generic(struct ixgbe_hw *hw); +s32 ixgbe_update_mc_addr_list_generic(struct ixgbe_hw *hw, u8 *mc_addr_list, + u32 mc_addr_count, + ixgbe_mc_addr_itr func, bool clear); +s32 ixgbe_update_uc_addr_list_generic(struct ixgbe_hw *hw, u8 *addr_list, + u32 addr_count, ixgbe_mc_addr_itr func); +s32 ixgbe_enable_mc_generic(struct ixgbe_hw *hw); +s32 ixgbe_disable_mc_generic(struct ixgbe_hw *hw); +s32 ixgbe_enable_rx_dma_generic(struct ixgbe_hw *hw, u32 regval); +s32 ixgbe_disable_sec_rx_path_generic(struct ixgbe_hw *hw); +s32 ixgbe_enable_sec_rx_path_generic(struct ixgbe_hw *hw); + +s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw); +void ixgbe_fc_autoneg(struct ixgbe_hw *hw); + +s32 ixgbe_validate_mac_addr(u8 *mac_addr); +s32 ixgbe_acquire_swfw_sync(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_sync(struct ixgbe_hw *hw, u16 mask); +s32 ixgbe_disable_pcie_master(struct ixgbe_hw *hw); + +s32 ixgbe_blink_led_start_generic(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_stop_generic(struct ixgbe_hw *hw, u32 index); + +s32 ixgbe_get_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr); +s32 ixgbe_set_san_mac_addr_generic(struct ixgbe_hw *hw, u8 *san_mac_addr); + +s32 ixgbe_set_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_set_vmdq_san_mac_generic(struct ixgbe_hw *hw, u32 vmdq); +s32 ixgbe_clear_vmdq_generic(struct ixgbe_hw *hw, u32 rar, u32 vmdq); +s32 ixgbe_insert_mac_addr_generic(struct ixgbe_hw *hw, u8 *addr, u32 vmdq); +s32 ixgbe_init_uta_tables_generic(struct ixgbe_hw *hw); +s32 ixgbe_set_vfta_generic(struct ixgbe_hw *hw, u32 vlan, + u32 vind, bool vlan_on); +s32 ixgbe_set_vlvf_generic(struct ixgbe_hw *hw, u32 vlan, u32 vind, + bool vlan_on, bool *vfta_changed); +s32 ixgbe_clear_vfta_generic(struct ixgbe_hw *hw); +s32 ixgbe_find_vlvf_slot(struct ixgbe_hw *hw, u32 vlan); + +s32 ixgbe_check_mac_link_generic(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up, bool link_up_wait_to_complete); + +s32 ixgbe_get_wwn_prefix_generic(struct ixgbe_hw *hw, u16 *wwnn_prefix, + u16 *wwpn_prefix); + +s32 ixgbe_get_fcoe_boot_status_generic(struct ixgbe_hw *hw, u16 *bs); +void ixgbe_set_mac_anti_spoofing(struct ixgbe_hw *hw, bool enable, int pf); +void ixgbe_set_vlan_anti_spoofing(struct ixgbe_hw *hw, bool enable, int vf); +s32 ixgbe_get_device_caps_generic(struct ixgbe_hw *hw, u16 *device_caps); +void ixgbe_set_rxpba_generic(struct ixgbe_hw *hw, int num_pb, u32 headroom, + int strategy); +s32 ixgbe_set_fw_drv_ver_generic(struct ixgbe_hw *hw, u8 maj, u8 min, + u8 build, u8 ver); +void ixgbe_clear_tx_pending(struct ixgbe_hw *hw); + +#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 +#define IXGBE_EMC_INTERNAL_DATA 0x00 +#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20 +#define IXGBE_EMC_DIODE1_DATA 0x01 +#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19 +#define IXGBE_EMC_DIODE2_DATA 0x23 +#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A +#define IXGBE_EMC_DIODE3_DATA 0x2A +#define IXGBE_EMC_DIODE3_THERM_LIMIT 0x30 + +s32 ixgbe_get_thermal_sensor_data_generic(struct ixgbe_hw *hw); +s32 ixgbe_init_thermal_sensor_thresh_generic(struct ixgbe_hw *hw); +#endif /* IXGBE_COMMON */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h new file mode 100644 index 00000000..a6690451 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_dcb.h @@ -0,0 +1,168 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_DCB_H_ +#define _IXGBE_DCB_H_ + + +#include "ixgbe_type.h" + +/* DCB defines */ +/* DCB credit calculation defines */ +#define IXGBE_DCB_CREDIT_QUANTUM 64 +#define IXGBE_DCB_MAX_CREDIT_REFILL 200 /* 200 * 64B = 12800B */ +#define IXGBE_DCB_MAX_TSO_SIZE (32 * 1024) /* Max TSO pkt size in DCB*/ +#define IXGBE_DCB_MAX_CREDIT (2 * IXGBE_DCB_MAX_CREDIT_REFILL) + +/* 513 for 32KB TSO packet */ +#define IXGBE_DCB_MIN_TSO_CREDIT \ + ((IXGBE_DCB_MAX_TSO_SIZE / IXGBE_DCB_CREDIT_QUANTUM) + 1) + +/* DCB configuration defines */ +#define IXGBE_DCB_MAX_USER_PRIORITY 8 +#define IXGBE_DCB_MAX_BW_GROUP 8 +#define IXGBE_DCB_BW_PERCENT 100 + +#define IXGBE_DCB_TX_CONFIG 0 +#define IXGBE_DCB_RX_CONFIG 1 + +/* DCB capability defines */ +#define IXGBE_DCB_PG_SUPPORT 0x00000001 +#define IXGBE_DCB_PFC_SUPPORT 0x00000002 +#define IXGBE_DCB_BCN_SUPPORT 0x00000004 +#define IXGBE_DCB_UP2TC_SUPPORT 0x00000008 +#define IXGBE_DCB_GSP_SUPPORT 0x00000010 + +struct ixgbe_dcb_support { + u32 capabilities; /* DCB capabilities */ + + /* Each bit represents a number of TCs configurable in the hw. + * If 8 traffic classes can be configured, the value is 0x80. */ + u8 traffic_classes; + u8 pfc_traffic_classes; +}; + +enum ixgbe_dcb_tsa { + ixgbe_dcb_tsa_ets = 0, + ixgbe_dcb_tsa_group_strict_cee, + ixgbe_dcb_tsa_strict +}; + +/* Traffic class bandwidth allocation per direction */ +struct ixgbe_dcb_tc_path { + u8 bwg_id; /* Bandwidth Group (BWG) ID */ + u8 bwg_percent; /* % of BWG's bandwidth */ + u8 link_percent; /* % of link bandwidth */ + u8 up_to_tc_bitmap; /* User Priority to Traffic Class mapping */ + u16 data_credits_refill; /* Credit refill amount in 64B granularity */ + u16 data_credits_max; /* Max credits for a configured packet buffer + * in 64B granularity.*/ + enum ixgbe_dcb_tsa tsa; /* Link or Group Strict Priority */ +}; + +enum ixgbe_dcb_pfc { + ixgbe_dcb_pfc_disabled = 0, + ixgbe_dcb_pfc_enabled, + ixgbe_dcb_pfc_enabled_txonly, + ixgbe_dcb_pfc_enabled_rxonly +}; + +/* Traffic class configuration */ +struct ixgbe_dcb_tc_config { + struct ixgbe_dcb_tc_path path[2]; /* One each for Tx/Rx */ + enum ixgbe_dcb_pfc pfc; /* Class based flow control setting */ + + u16 desc_credits_max; /* For Tx Descriptor arbitration */ + u8 tc; /* Traffic class (TC) */ +}; + +enum ixgbe_dcb_pba { + /* PBA[0-7] each use 64KB FIFO */ + ixgbe_dcb_pba_equal = PBA_STRATEGY_EQUAL, + /* PBA[0-3] each use 80KB, PBA[4-7] each use 48KB */ + ixgbe_dcb_pba_80_48 = PBA_STRATEGY_WEIGHTED +}; + +struct ixgbe_dcb_num_tcs { + u8 pg_tcs; + u8 pfc_tcs; +}; + +struct ixgbe_dcb_config { + struct ixgbe_dcb_tc_config tc_config[IXGBE_DCB_MAX_TRAFFIC_CLASS]; + struct ixgbe_dcb_support support; + struct ixgbe_dcb_num_tcs num_tcs; + u8 bw_percentage[2][IXGBE_DCB_MAX_BW_GROUP]; /* One each for Tx/Rx */ + bool pfc_mode_enable; + bool round_robin_enable; + + enum ixgbe_dcb_pba rx_pba_cfg; + + u32 dcb_cfg_version; /* Not used...OS-specific? */ + u32 link_speed; /* For bandwidth allocation validation purpose */ + bool vt_mode; +}; + +/* DCB driver APIs */ + +/* DCB rule checking */ +s32 ixgbe_dcb_check_config_cee(struct ixgbe_dcb_config *); + +/* DCB credits calculation */ +s32 ixgbe_dcb_calculate_tc_credits(u8 *, u16 *, u16 *, int); +s32 ixgbe_dcb_calculate_tc_credits_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *, u32, u8); + +/* DCB PFC */ +s32 ixgbe_dcb_config_pfc(struct ixgbe_hw *, u8, u8 *); +s32 ixgbe_dcb_config_pfc_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *); + +/* DCB stats */ +s32 ixgbe_dcb_config_tc_stats(struct ixgbe_hw *); +s32 ixgbe_dcb_get_tc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); +s32 ixgbe_dcb_get_pfc_stats(struct ixgbe_hw *, struct ixgbe_hw_stats *, u8); + +/* DCB config arbiters */ +s32 ixgbe_dcb_config_tx_desc_arbiter_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_tx_data_arbiter_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *); +s32 ixgbe_dcb_config_rx_arbiter_cee(struct ixgbe_hw *, + struct ixgbe_dcb_config *); + +/* DCB unpack routines */ +void ixgbe_dcb_unpack_pfc_cee(struct ixgbe_dcb_config *, u8 *, u8 *); +void ixgbe_dcb_unpack_refill_cee(struct ixgbe_dcb_config *, int, u16 *); +void ixgbe_dcb_unpack_max_cee(struct ixgbe_dcb_config *, u16 *); +void ixgbe_dcb_unpack_bwgid_cee(struct ixgbe_dcb_config *, int, u8 *); +void ixgbe_dcb_unpack_tsa_cee(struct ixgbe_dcb_config *, int, u8 *); +void ixgbe_dcb_unpack_map_cee(struct ixgbe_dcb_config *, int, u8 *); + +/* DCB initialization */ +s32 ixgbe_dcb_hw_config(struct ixgbe_hw *, u16 *, u16 *, u8 *, u8 *, u8 *); +s32 ixgbe_dcb_hw_config_cee(struct ixgbe_hw *, struct ixgbe_dcb_config *); +#endif /* _IXGBE_DCB_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c new file mode 100644 index 00000000..11472bd3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_ethtool.c @@ -0,0 +1,2901 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/* ethtool support for ixgbe */ + +#include +#include +#include +#include +#include +#include +#include +#ifdef SIOCETHTOOL +#include + +#include "ixgbe.h" + +#ifndef ETH_GSTRING_LEN +#define ETH_GSTRING_LEN 32 +#endif + +#define IXGBE_ALL_RAR_ENTRIES 16 + +#ifdef ETHTOOL_OPS_COMPAT +#include "kcompat_ethtool.c" +#endif +#ifdef ETHTOOL_GSTATS +struct ixgbe_stats { + char stat_string[ETH_GSTRING_LEN]; + int sizeof_stat; + int stat_offset; +}; + +#define IXGBE_NETDEV_STAT(_net_stat) { \ + .stat_string = #_net_stat, \ + .sizeof_stat = FIELD_SIZEOF(struct net_device_stats, _net_stat), \ + .stat_offset = offsetof(struct net_device_stats, _net_stat) \ +} +static const struct ixgbe_stats ixgbe_gstrings_net_stats[] = { + IXGBE_NETDEV_STAT(rx_packets), + IXGBE_NETDEV_STAT(tx_packets), + IXGBE_NETDEV_STAT(rx_bytes), + IXGBE_NETDEV_STAT(tx_bytes), + IXGBE_NETDEV_STAT(rx_errors), + IXGBE_NETDEV_STAT(tx_errors), + IXGBE_NETDEV_STAT(rx_dropped), + IXGBE_NETDEV_STAT(tx_dropped), + IXGBE_NETDEV_STAT(multicast), + IXGBE_NETDEV_STAT(collisions), + IXGBE_NETDEV_STAT(rx_over_errors), + IXGBE_NETDEV_STAT(rx_crc_errors), + IXGBE_NETDEV_STAT(rx_frame_errors), + IXGBE_NETDEV_STAT(rx_fifo_errors), + IXGBE_NETDEV_STAT(rx_missed_errors), + IXGBE_NETDEV_STAT(tx_aborted_errors), + IXGBE_NETDEV_STAT(tx_carrier_errors), + IXGBE_NETDEV_STAT(tx_fifo_errors), + IXGBE_NETDEV_STAT(tx_heartbeat_errors), +}; + +#define IXGBE_STAT(_name, _stat) { \ + .stat_string = _name, \ + .sizeof_stat = FIELD_SIZEOF(struct ixgbe_adapter, _stat), \ + .stat_offset = offsetof(struct ixgbe_adapter, _stat) \ +} +static struct ixgbe_stats ixgbe_gstrings_stats[] = { + IXGBE_STAT("rx_pkts_nic", stats.gprc), + IXGBE_STAT("tx_pkts_nic", stats.gptc), + IXGBE_STAT("rx_bytes_nic", stats.gorc), + IXGBE_STAT("tx_bytes_nic", stats.gotc), + IXGBE_STAT("lsc_int", lsc_int), + IXGBE_STAT("tx_busy", tx_busy), + IXGBE_STAT("non_eop_descs", non_eop_descs), +#ifndef CONFIG_IXGBE_NAPI + IXGBE_STAT("rx_dropped_backlog", rx_dropped_backlog), +#endif + IXGBE_STAT("broadcast", stats.bprc), + IXGBE_STAT("rx_no_buffer_count", stats.rnbc[0]) , + IXGBE_STAT("tx_timeout_count", tx_timeout_count), + IXGBE_STAT("tx_restart_queue", restart_queue), + IXGBE_STAT("rx_long_length_errors", stats.roc), + IXGBE_STAT("rx_short_length_errors", stats.ruc), + IXGBE_STAT("tx_flow_control_xon", stats.lxontxc), + IXGBE_STAT("rx_flow_control_xon", stats.lxonrxc), + IXGBE_STAT("tx_flow_control_xoff", stats.lxofftxc), + IXGBE_STAT("rx_flow_control_xoff", stats.lxoffrxc), + IXGBE_STAT("rx_csum_offload_errors", hw_csum_rx_error), + IXGBE_STAT("alloc_rx_page_failed", alloc_rx_page_failed), + IXGBE_STAT("alloc_rx_buff_failed", alloc_rx_buff_failed), +#ifndef IXGBE_NO_LRO + IXGBE_STAT("lro_aggregated", lro_stats.coal), + IXGBE_STAT("lro_flushed", lro_stats.flushed), +#endif /* IXGBE_NO_LRO */ + IXGBE_STAT("rx_no_dma_resources", hw_rx_no_dma_resources), + IXGBE_STAT("hw_rsc_aggregated", rsc_total_count), + IXGBE_STAT("hw_rsc_flushed", rsc_total_flush), +#ifdef HAVE_TX_MQ + IXGBE_STAT("fdir_match", stats.fdirmatch), + IXGBE_STAT("fdir_miss", stats.fdirmiss), + IXGBE_STAT("fdir_overflow", fdir_overflow), +#endif /* HAVE_TX_MQ */ +#ifdef IXGBE_FCOE + IXGBE_STAT("fcoe_bad_fccrc", stats.fccrc), + IXGBE_STAT("fcoe_last_errors", stats.fclast), + IXGBE_STAT("rx_fcoe_dropped", stats.fcoerpdc), + IXGBE_STAT("rx_fcoe_packets", stats.fcoeprc), + IXGBE_STAT("rx_fcoe_dwords", stats.fcoedwrc), + IXGBE_STAT("fcoe_noddp", stats.fcoe_noddp), + IXGBE_STAT("fcoe_noddp_ext_buff", stats.fcoe_noddp_ext_buff), + IXGBE_STAT("tx_fcoe_packets", stats.fcoeptc), + IXGBE_STAT("tx_fcoe_dwords", stats.fcoedwtc), +#endif /* IXGBE_FCOE */ + IXGBE_STAT("os2bmc_rx_by_bmc", stats.o2bgptc), + IXGBE_STAT("os2bmc_tx_by_bmc", stats.b2ospc), + IXGBE_STAT("os2bmc_tx_by_host", stats.o2bspc), + IXGBE_STAT("os2bmc_rx_by_host", stats.b2ogprc), +}; + +#define IXGBE_QUEUE_STATS_LEN \ + ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_tx_queues + \ + ((struct ixgbe_adapter *)netdev_priv(netdev))->num_rx_queues) * \ + (sizeof(struct ixgbe_queue_stats) / sizeof(u64))) +#define IXGBE_GLOBAL_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_stats) +#define IXGBE_NETDEV_STATS_LEN ARRAY_SIZE(ixgbe_gstrings_net_stats) +#define IXGBE_PB_STATS_LEN ( \ + (((struct ixgbe_adapter *)netdev_priv(netdev))->flags & \ + IXGBE_FLAG_DCB_ENABLED) ? \ + (sizeof(((struct ixgbe_adapter *)0)->stats.pxonrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxontxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxoffrxc) + \ + sizeof(((struct ixgbe_adapter *)0)->stats.pxofftxc)) \ + / sizeof(u64) : 0) +#define IXGBE_VF_STATS_LEN \ + ((((struct ixgbe_adapter *)netdev_priv(netdev))->num_vfs) * \ + (sizeof(struct vf_stats) / sizeof(u64))) +#define IXGBE_STATS_LEN (IXGBE_GLOBAL_STATS_LEN + \ + IXGBE_NETDEV_STATS_LEN + \ + IXGBE_PB_STATS_LEN + \ + IXGBE_QUEUE_STATS_LEN + \ + IXGBE_VF_STATS_LEN) + +#endif /* ETHTOOL_GSTATS */ +#ifdef ETHTOOL_TEST +static const char ixgbe_gstrings_test[][ETH_GSTRING_LEN] = { + "Register test (offline)", "Eeprom test (offline)", + "Interrupt test (offline)", "Loopback test (offline)", + "Link test (on/offline)" +}; +#define IXGBE_TEST_LEN (sizeof(ixgbe_gstrings_test) / ETH_GSTRING_LEN) +#endif /* ETHTOOL_TEST */ + +int ixgbe_get_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 link_speed = 0; + bool link_up; + + ecmd->supported = SUPPORTED_10000baseT_Full; + ecmd->autoneg = AUTONEG_ENABLE; + ecmd->transceiver = XCVR_EXTERNAL; + if ((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber)) { + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg); + switch (hw->mac.type) { + case ixgbe_mac_X540: + ecmd->supported |= SUPPORTED_100baseT_Full; + break; + default: + break; + } + + ecmd->advertising = ADVERTISED_Autoneg; + if (hw->phy.autoneg_advertised) { + if (hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_100_FULL) + ecmd->advertising |= ADVERTISED_100baseT_Full; + if (hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_10GB_FULL) + ecmd->advertising |= ADVERTISED_10000baseT_Full; + if (hw->phy.autoneg_advertised & + IXGBE_LINK_SPEED_1GB_FULL) + ecmd->advertising |= ADVERTISED_1000baseT_Full; + } else { + /* + * Default advertised modes in case + * phy.autoneg_advertised isn't set. + */ + ecmd->advertising |= (ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full); + if (hw->mac.type == ixgbe_mac_X540) + ecmd->advertising |= ADVERTISED_100baseT_Full; + } + + if (hw->phy.media_type == ixgbe_media_type_copper) { + ecmd->supported |= SUPPORTED_TP; + ecmd->advertising |= ADVERTISED_TP; + ecmd->port = PORT_TP; + } else { + ecmd->supported |= SUPPORTED_FIBRE; + ecmd->advertising |= ADVERTISED_FIBRE; + ecmd->port = PORT_FIBRE; + } + } else if (hw->phy.media_type == ixgbe_media_type_backplane) { + /* Set as FIBRE until SERDES defined in kernel */ + if (hw->device_id == IXGBE_DEV_ID_82598_BX) { + ecmd->supported = (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + ecmd->autoneg = AUTONEG_DISABLE; + } else if ((hw->device_id == IXGBE_DEV_ID_82599_COMBO_BACKPLANE) + || (hw->device_id == IXGBE_DEV_ID_82599_KX4_MEZZ)) { + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_Autoneg | + SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_Autoneg | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + } else { + ecmd->supported |= (SUPPORTED_1000baseT_Full | + SUPPORTED_FIBRE); + ecmd->advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + } + } else { + ecmd->supported |= SUPPORTED_FIBRE; + ecmd->advertising = (ADVERTISED_10000baseT_Full | + ADVERTISED_FIBRE); + ecmd->port = PORT_FIBRE; + ecmd->autoneg = AUTONEG_DISABLE; + } + +#ifdef HAVE_ETHTOOL_SFP_DISPLAY_PORT + /* Get PHY type */ + switch (adapter->hw.phy.type) { + case ixgbe_phy_tn: + case ixgbe_phy_aq: + case ixgbe_phy_cu_unknown: + /* Copper 10G-BASET */ + ecmd->port = PORT_TP; + break; + case ixgbe_phy_qt: + ecmd->port = PORT_FIBRE; + break; + case ixgbe_phy_nl: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + switch (adapter->hw.phy.sfp_type) { + /* SFP+ devices, further checking needed */ + case ixgbe_sfp_type_da_cu: + case ixgbe_sfp_type_da_cu_core0: + case ixgbe_sfp_type_da_cu_core1: + ecmd->port = PORT_DA; + break; + case ixgbe_sfp_type_sr: + case ixgbe_sfp_type_lr: + case ixgbe_sfp_type_srlr_core0: + case ixgbe_sfp_type_srlr_core1: + ecmd->port = PORT_FIBRE; + break; + case ixgbe_sfp_type_not_present: + ecmd->port = PORT_NONE; + break; + case ixgbe_sfp_type_1g_cu_core0: + case ixgbe_sfp_type_1g_cu_core1: + ecmd->port = PORT_TP; + ecmd->supported = SUPPORTED_TP; + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_TP); + break; + case ixgbe_sfp_type_1g_sx_core0: + case ixgbe_sfp_type_1g_sx_core1: + ecmd->port = PORT_FIBRE; + ecmd->supported = SUPPORTED_FIBRE; + ecmd->advertising = (ADVERTISED_1000baseT_Full | + ADVERTISED_FIBRE); + break; + case ixgbe_sfp_type_unknown: + default: + ecmd->port = PORT_OTHER; + break; + } + break; + case ixgbe_phy_xaui: + ecmd->port = PORT_NONE; + break; + case ixgbe_phy_unknown: + case ixgbe_phy_generic: + case ixgbe_phy_sfp_unsupported: + default: + ecmd->port = PORT_OTHER; + break; + } +#endif + + if (!in_interrupt()) { + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + } else { + /* + * this case is a special workaround for RHEL5 bonding + * that calls this routine from interrupt context + */ + link_speed = adapter->link_speed; + link_up = adapter->link_up; + } + + if (link_up) { + switch (link_speed) { + case IXGBE_LINK_SPEED_10GB_FULL: + ecmd->speed = SPEED_10000; + break; + case IXGBE_LINK_SPEED_1GB_FULL: + ecmd->speed = SPEED_1000; + break; + case IXGBE_LINK_SPEED_100_FULL: + ecmd->speed = SPEED_100; + break; + default: + break; + } + ecmd->duplex = DUPLEX_FULL; + } else { + ecmd->speed = -1; + ecmd->duplex = -1; + } + + return 0; +} + +static int ixgbe_set_settings(struct net_device *netdev, + struct ethtool_cmd *ecmd) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 advertised, old; + s32 err = 0; + + if ((hw->phy.media_type == ixgbe_media_type_copper) || + (hw->phy.multispeed_fiber)) { + /* + * this function does not support duplex forcing, but can + * limit the advertising of the adapter to the specified speed + */ + if (ecmd->autoneg == AUTONEG_DISABLE) + return -EINVAL; + + if (ecmd->advertising & ~ecmd->supported) + return -EINVAL; + + old = hw->phy.autoneg_advertised; + advertised = 0; + if (ecmd->advertising & ADVERTISED_10000baseT_Full) + advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (ecmd->advertising & ADVERTISED_1000baseT_Full) + advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + if (ecmd->advertising & ADVERTISED_100baseT_Full) + advertised |= IXGBE_LINK_SPEED_100_FULL; + + if (old == advertised) + return err; + /* this sets the link speed and restarts auto-neg */ + hw->mac.autotry_restart = true; + err = hw->mac.ops.setup_link(hw, advertised, true, true); + if (err) { + e_info(probe, "setup link failed with code %d\n", err); + hw->mac.ops.setup_link(hw, old, true, true); + } + } + return err; +} + +static void ixgbe_get_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + if (hw->fc.disable_fc_autoneg) + pause->autoneg = 0; + else + pause->autoneg = 1; + + if (hw->fc.current_mode == ixgbe_fc_rx_pause) { + pause->rx_pause = 1; + } else if (hw->fc.current_mode == ixgbe_fc_tx_pause) { + pause->tx_pause = 1; + } else if (hw->fc.current_mode == ixgbe_fc_full) { + pause->rx_pause = 1; + pause->tx_pause = 1; + } +} + +static int ixgbe_set_pauseparam(struct net_device *netdev, + struct ethtool_pauseparam *pause) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_fc_info fc = hw->fc; + + /* 82598 does no support link flow control with DCB enabled */ + if ((hw->mac.type == ixgbe_mac_82598EB) && + (adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + return -EINVAL; + + fc.disable_fc_autoneg = (pause->autoneg != AUTONEG_ENABLE); + + if ((pause->rx_pause && pause->tx_pause) || pause->autoneg) + fc.requested_mode = ixgbe_fc_full; + else if (pause->rx_pause) + fc.requested_mode = ixgbe_fc_rx_pause; + else if (pause->tx_pause) + fc.requested_mode = ixgbe_fc_tx_pause; + else + fc.requested_mode = ixgbe_fc_none; + + /* if the thing changed then we'll update and use new autoneg */ + if (memcmp(&fc, &hw->fc, sizeof(struct ixgbe_fc_info))) { + hw->fc = fc; + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + else + ixgbe_reset(adapter); + } + + return 0; +} + +static u32 ixgbe_get_msglevel(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + return adapter->msg_enable; +} + +static void ixgbe_set_msglevel(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + adapter->msg_enable = data; +} + +static int ixgbe_get_regs_len(struct net_device *netdev) +{ +#define IXGBE_REGS_LEN 1129 + return IXGBE_REGS_LEN * sizeof(u32); +} + +#define IXGBE_GET_STAT(_A_, _R_) (_A_->stats._R_) + + +static void ixgbe_get_regs(struct net_device *netdev, struct ethtool_regs *regs, + void *p) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 *regs_buff = p; + u8 i; + + printk(KERN_DEBUG "ixgbe_get_regs_1\n"); + memset(p, 0, IXGBE_REGS_LEN * sizeof(u32)); + printk(KERN_DEBUG "ixgbe_get_regs_2 0x%p\n", hw->hw_addr); + + regs->version = (1 << 24) | hw->revision_id << 16 | hw->device_id; + + /* General Registers */ + regs_buff[0] = IXGBE_READ_REG(hw, IXGBE_CTRL); + printk(KERN_DEBUG "ixgbe_get_regs_3\n"); + regs_buff[1] = IXGBE_READ_REG(hw, IXGBE_STATUS); + regs_buff[2] = IXGBE_READ_REG(hw, IXGBE_CTRL_EXT); + regs_buff[3] = IXGBE_READ_REG(hw, IXGBE_ESDP); + regs_buff[4] = IXGBE_READ_REG(hw, IXGBE_EODSDP); + regs_buff[5] = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + regs_buff[6] = IXGBE_READ_REG(hw, IXGBE_FRTIMER); + regs_buff[7] = IXGBE_READ_REG(hw, IXGBE_TCPTIMER); + + printk(KERN_DEBUG "ixgbe_get_regs_4\n"); + + /* NVM Register */ + regs_buff[8] = IXGBE_READ_REG(hw, IXGBE_EEC); + regs_buff[9] = IXGBE_READ_REG(hw, IXGBE_EERD); + regs_buff[10] = IXGBE_READ_REG(hw, IXGBE_FLA); + regs_buff[11] = IXGBE_READ_REG(hw, IXGBE_EEMNGCTL); + regs_buff[12] = IXGBE_READ_REG(hw, IXGBE_EEMNGDATA); + regs_buff[13] = IXGBE_READ_REG(hw, IXGBE_FLMNGCTL); + regs_buff[14] = IXGBE_READ_REG(hw, IXGBE_FLMNGDATA); + regs_buff[15] = IXGBE_READ_REG(hw, IXGBE_FLMNGCNT); + regs_buff[16] = IXGBE_READ_REG(hw, IXGBE_FLOP); + regs_buff[17] = IXGBE_READ_REG(hw, IXGBE_GRC); + + /* Interrupt */ + /* don't read EICR because it can clear interrupt causes, instead + * read EICS which is a shadow but doesn't clear EICR */ + regs_buff[18] = IXGBE_READ_REG(hw, IXGBE_EICS); + regs_buff[19] = IXGBE_READ_REG(hw, IXGBE_EICS); + regs_buff[20] = IXGBE_READ_REG(hw, IXGBE_EIMS); + regs_buff[21] = IXGBE_READ_REG(hw, IXGBE_EIMC); + regs_buff[22] = IXGBE_READ_REG(hw, IXGBE_EIAC); + regs_buff[23] = IXGBE_READ_REG(hw, IXGBE_EIAM); + regs_buff[24] = IXGBE_READ_REG(hw, IXGBE_EITR(0)); + regs_buff[25] = IXGBE_READ_REG(hw, IXGBE_IVAR(0)); + regs_buff[26] = IXGBE_READ_REG(hw, IXGBE_MSIXT); + regs_buff[27] = IXGBE_READ_REG(hw, IXGBE_MSIXPBA); + regs_buff[28] = IXGBE_READ_REG(hw, IXGBE_PBACL(0)); + regs_buff[29] = IXGBE_READ_REG(hw, IXGBE_GPIE); + + /* Flow Control */ + regs_buff[30] = IXGBE_READ_REG(hw, IXGBE_PFCTOP); + regs_buff[31] = IXGBE_READ_REG(hw, IXGBE_FCTTV(0)); + regs_buff[32] = IXGBE_READ_REG(hw, IXGBE_FCTTV(1)); + regs_buff[33] = IXGBE_READ_REG(hw, IXGBE_FCTTV(2)); + regs_buff[34] = IXGBE_READ_REG(hw, IXGBE_FCTTV(3)); + for (i = 0; i < 8; i++) { + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + regs_buff[35 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTL(i)); + regs_buff[43 + i] = IXGBE_READ_REG(hw, IXGBE_FCRTH(i)); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + regs_buff[35 + i] = IXGBE_READ_REG(hw, + IXGBE_FCRTL_82599(i)); + regs_buff[43 + i] = IXGBE_READ_REG(hw, + IXGBE_FCRTH_82599(i)); + break; + default: + break; + } + } + regs_buff[51] = IXGBE_READ_REG(hw, IXGBE_FCRTV); + regs_buff[52] = IXGBE_READ_REG(hw, IXGBE_TFCS); + + /* Receive DMA */ + for (i = 0; i < 64; i++) + regs_buff[53 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAL(i)); + for (i = 0; i < 64; i++) + regs_buff[117 + i] = IXGBE_READ_REG(hw, IXGBE_RDBAH(i)); + for (i = 0; i < 64; i++) + regs_buff[181 + i] = IXGBE_READ_REG(hw, IXGBE_RDLEN(i)); + for (i = 0; i < 64; i++) + regs_buff[245 + i] = IXGBE_READ_REG(hw, IXGBE_RDH(i)); + for (i = 0; i < 64; i++) + regs_buff[309 + i] = IXGBE_READ_REG(hw, IXGBE_RDT(i)); + for (i = 0; i < 64; i++) + regs_buff[373 + i] = IXGBE_READ_REG(hw, IXGBE_RXDCTL(i)); + for (i = 0; i < 16; i++) + regs_buff[437 + i] = IXGBE_READ_REG(hw, IXGBE_SRRCTL(i)); + for (i = 0; i < 16; i++) + regs_buff[453 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_RXCTRL(i)); + regs_buff[469] = IXGBE_READ_REG(hw, IXGBE_RDRXCTL); + for (i = 0; i < 8; i++) + regs_buff[470 + i] = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(i)); + regs_buff[478] = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + regs_buff[479] = IXGBE_READ_REG(hw, IXGBE_DROPEN); + + /* Receive */ + regs_buff[480] = IXGBE_READ_REG(hw, IXGBE_RXCSUM); + regs_buff[481] = IXGBE_READ_REG(hw, IXGBE_RFCTL); + for (i = 0; i < 16; i++) + regs_buff[482 + i] = IXGBE_READ_REG(hw, IXGBE_RAL(i)); + for (i = 0; i < 16; i++) + regs_buff[498 + i] = IXGBE_READ_REG(hw, IXGBE_RAH(i)); + regs_buff[514] = IXGBE_READ_REG(hw, IXGBE_PSRTYPE(0)); + regs_buff[515] = IXGBE_READ_REG(hw, IXGBE_FCTRL); + regs_buff[516] = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + regs_buff[517] = IXGBE_READ_REG(hw, IXGBE_MCSTCTRL); + regs_buff[518] = IXGBE_READ_REG(hw, IXGBE_MRQC); + regs_buff[519] = IXGBE_READ_REG(hw, IXGBE_VMD_CTL); + for (i = 0; i < 8; i++) + regs_buff[520 + i] = IXGBE_READ_REG(hw, IXGBE_IMIR(i)); + for (i = 0; i < 8; i++) + regs_buff[528 + i] = IXGBE_READ_REG(hw, IXGBE_IMIREXT(i)); + regs_buff[536] = IXGBE_READ_REG(hw, IXGBE_IMIRVP); + + /* Transmit */ + for (i = 0; i < 32; i++) + regs_buff[537 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAL(i)); + for (i = 0; i < 32; i++) + regs_buff[569 + i] = IXGBE_READ_REG(hw, IXGBE_TDBAH(i)); + for (i = 0; i < 32; i++) + regs_buff[601 + i] = IXGBE_READ_REG(hw, IXGBE_TDLEN(i)); + for (i = 0; i < 32; i++) + regs_buff[633 + i] = IXGBE_READ_REG(hw, IXGBE_TDH(i)); + for (i = 0; i < 32; i++) + regs_buff[665 + i] = IXGBE_READ_REG(hw, IXGBE_TDT(i)); + for (i = 0; i < 32; i++) + regs_buff[697 + i] = IXGBE_READ_REG(hw, IXGBE_TXDCTL(i)); + for (i = 0; i < 32; i++) + regs_buff[729 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAL(i)); + for (i = 0; i < 32; i++) + regs_buff[761 + i] = IXGBE_READ_REG(hw, IXGBE_TDWBAH(i)); + regs_buff[793] = IXGBE_READ_REG(hw, IXGBE_DTXCTL); + for (i = 0; i < 16; i++) + regs_buff[794 + i] = IXGBE_READ_REG(hw, IXGBE_DCA_TXCTRL(i)); + regs_buff[810] = IXGBE_READ_REG(hw, IXGBE_TIPG); + for (i = 0; i < 8; i++) + regs_buff[811 + i] = IXGBE_READ_REG(hw, IXGBE_TXPBSIZE(i)); + regs_buff[819] = IXGBE_READ_REG(hw, IXGBE_MNGTXMAP); + + /* Wake Up */ + regs_buff[820] = IXGBE_READ_REG(hw, IXGBE_WUC); + regs_buff[821] = IXGBE_READ_REG(hw, IXGBE_WUFC); + regs_buff[822] = IXGBE_READ_REG(hw, IXGBE_WUS); + regs_buff[823] = IXGBE_READ_REG(hw, IXGBE_IPAV); + regs_buff[824] = IXGBE_READ_REG(hw, IXGBE_IP4AT); + regs_buff[825] = IXGBE_READ_REG(hw, IXGBE_IP6AT); + regs_buff[826] = IXGBE_READ_REG(hw, IXGBE_WUPL); + regs_buff[827] = IXGBE_READ_REG(hw, IXGBE_WUPM); + regs_buff[828] = IXGBE_READ_REG(hw, IXGBE_FHFT(0)); + + /* DCB */ + regs_buff[829] = IXGBE_READ_REG(hw, IXGBE_RMCS); + regs_buff[830] = IXGBE_READ_REG(hw, IXGBE_DPMCS); + regs_buff[831] = IXGBE_READ_REG(hw, IXGBE_PDPMCS); + regs_buff[832] = IXGBE_READ_REG(hw, IXGBE_RUPPBMR); + for (i = 0; i < 8; i++) + regs_buff[833 + i] = IXGBE_READ_REG(hw, IXGBE_RT2CR(i)); + for (i = 0; i < 8; i++) + regs_buff[841 + i] = IXGBE_READ_REG(hw, IXGBE_RT2SR(i)); + for (i = 0; i < 8; i++) + regs_buff[849 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCCR(i)); + for (i = 0; i < 8; i++) + regs_buff[857 + i] = IXGBE_READ_REG(hw, IXGBE_TDTQ2TCSR(i)); + for (i = 0; i < 8; i++) + regs_buff[865 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCCR(i)); + for (i = 0; i < 8; i++) + regs_buff[873 + i] = IXGBE_READ_REG(hw, IXGBE_TDPT2TCSR(i)); + + /* Statistics */ + regs_buff[881] = IXGBE_GET_STAT(adapter, crcerrs); + regs_buff[882] = IXGBE_GET_STAT(adapter, illerrc); + regs_buff[883] = IXGBE_GET_STAT(adapter, errbc); + regs_buff[884] = IXGBE_GET_STAT(adapter, mspdc); + for (i = 0; i < 8; i++) + regs_buff[885 + i] = IXGBE_GET_STAT(adapter, mpc[i]); + regs_buff[893] = IXGBE_GET_STAT(adapter, mlfc); + regs_buff[894] = IXGBE_GET_STAT(adapter, mrfc); + regs_buff[895] = IXGBE_GET_STAT(adapter, rlec); + regs_buff[896] = IXGBE_GET_STAT(adapter, lxontxc); + regs_buff[897] = IXGBE_GET_STAT(adapter, lxonrxc); + regs_buff[898] = IXGBE_GET_STAT(adapter, lxofftxc); + regs_buff[899] = IXGBE_GET_STAT(adapter, lxoffrxc); + for (i = 0; i < 8; i++) + regs_buff[900 + i] = IXGBE_GET_STAT(adapter, pxontxc[i]); + for (i = 0; i < 8; i++) + regs_buff[908 + i] = IXGBE_GET_STAT(adapter, pxonrxc[i]); + for (i = 0; i < 8; i++) + regs_buff[916 + i] = IXGBE_GET_STAT(adapter, pxofftxc[i]); + for (i = 0; i < 8; i++) + regs_buff[924 + i] = IXGBE_GET_STAT(adapter, pxoffrxc[i]); + regs_buff[932] = IXGBE_GET_STAT(adapter, prc64); + regs_buff[933] = IXGBE_GET_STAT(adapter, prc127); + regs_buff[934] = IXGBE_GET_STAT(adapter, prc255); + regs_buff[935] = IXGBE_GET_STAT(adapter, prc511); + regs_buff[936] = IXGBE_GET_STAT(adapter, prc1023); + regs_buff[937] = IXGBE_GET_STAT(adapter, prc1522); + regs_buff[938] = IXGBE_GET_STAT(adapter, gprc); + regs_buff[939] = IXGBE_GET_STAT(adapter, bprc); + regs_buff[940] = IXGBE_GET_STAT(adapter, mprc); + regs_buff[941] = IXGBE_GET_STAT(adapter, gptc); + regs_buff[942] = IXGBE_GET_STAT(adapter, gorc); + regs_buff[944] = IXGBE_GET_STAT(adapter, gotc); + for (i = 0; i < 8; i++) + regs_buff[946 + i] = IXGBE_GET_STAT(adapter, rnbc[i]); + regs_buff[954] = IXGBE_GET_STAT(adapter, ruc); + regs_buff[955] = IXGBE_GET_STAT(adapter, rfc); + regs_buff[956] = IXGBE_GET_STAT(adapter, roc); + regs_buff[957] = IXGBE_GET_STAT(adapter, rjc); + regs_buff[958] = IXGBE_GET_STAT(adapter, mngprc); + regs_buff[959] = IXGBE_GET_STAT(adapter, mngpdc); + regs_buff[960] = IXGBE_GET_STAT(adapter, mngptc); + regs_buff[961] = IXGBE_GET_STAT(adapter, tor); + regs_buff[963] = IXGBE_GET_STAT(adapter, tpr); + regs_buff[964] = IXGBE_GET_STAT(adapter, tpt); + regs_buff[965] = IXGBE_GET_STAT(adapter, ptc64); + regs_buff[966] = IXGBE_GET_STAT(adapter, ptc127); + regs_buff[967] = IXGBE_GET_STAT(adapter, ptc255); + regs_buff[968] = IXGBE_GET_STAT(adapter, ptc511); + regs_buff[969] = IXGBE_GET_STAT(adapter, ptc1023); + regs_buff[970] = IXGBE_GET_STAT(adapter, ptc1522); + regs_buff[971] = IXGBE_GET_STAT(adapter, mptc); + regs_buff[972] = IXGBE_GET_STAT(adapter, bptc); + regs_buff[973] = IXGBE_GET_STAT(adapter, xec); + for (i = 0; i < 16; i++) + regs_buff[974 + i] = IXGBE_GET_STAT(adapter, qprc[i]); + for (i = 0; i < 16; i++) + regs_buff[990 + i] = IXGBE_GET_STAT(adapter, qptc[i]); + for (i = 0; i < 16; i++) + regs_buff[1006 + i] = IXGBE_GET_STAT(adapter, qbrc[i]); + for (i = 0; i < 16; i++) + regs_buff[1022 + i] = IXGBE_GET_STAT(adapter, qbtc[i]); + + /* MAC */ + regs_buff[1038] = IXGBE_READ_REG(hw, IXGBE_PCS1GCFIG); + regs_buff[1039] = IXGBE_READ_REG(hw, IXGBE_PCS1GLCTL); + regs_buff[1040] = IXGBE_READ_REG(hw, IXGBE_PCS1GLSTA); + regs_buff[1041] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG0); + regs_buff[1042] = IXGBE_READ_REG(hw, IXGBE_PCS1GDBG1); + regs_buff[1043] = IXGBE_READ_REG(hw, IXGBE_PCS1GANA); + regs_buff[1044] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLP); + regs_buff[1045] = IXGBE_READ_REG(hw, IXGBE_PCS1GANNP); + regs_buff[1046] = IXGBE_READ_REG(hw, IXGBE_PCS1GANLPNP); + regs_buff[1047] = IXGBE_READ_REG(hw, IXGBE_HLREG0); + regs_buff[1048] = IXGBE_READ_REG(hw, IXGBE_HLREG1); + regs_buff[1049] = IXGBE_READ_REG(hw, IXGBE_PAP); + regs_buff[1050] = IXGBE_READ_REG(hw, IXGBE_MACA); + regs_buff[1051] = IXGBE_READ_REG(hw, IXGBE_APAE); + regs_buff[1052] = IXGBE_READ_REG(hw, IXGBE_ARD); + regs_buff[1053] = IXGBE_READ_REG(hw, IXGBE_AIS); + regs_buff[1054] = IXGBE_READ_REG(hw, IXGBE_MSCA); + regs_buff[1055] = IXGBE_READ_REG(hw, IXGBE_MSRWD); + regs_buff[1056] = IXGBE_READ_REG(hw, IXGBE_MLADD); + regs_buff[1057] = IXGBE_READ_REG(hw, IXGBE_MHADD); + regs_buff[1058] = IXGBE_READ_REG(hw, IXGBE_TREG); + regs_buff[1059] = IXGBE_READ_REG(hw, IXGBE_PCSS1); + regs_buff[1060] = IXGBE_READ_REG(hw, IXGBE_PCSS2); + regs_buff[1061] = IXGBE_READ_REG(hw, IXGBE_XPCSS); + regs_buff[1062] = IXGBE_READ_REG(hw, IXGBE_SERDESC); + regs_buff[1063] = IXGBE_READ_REG(hw, IXGBE_MACS); + regs_buff[1064] = IXGBE_READ_REG(hw, IXGBE_AUTOC); + regs_buff[1065] = IXGBE_READ_REG(hw, IXGBE_LINKS); + regs_buff[1066] = IXGBE_READ_REG(hw, IXGBE_AUTOC2); + regs_buff[1067] = IXGBE_READ_REG(hw, IXGBE_AUTOC3); + regs_buff[1068] = IXGBE_READ_REG(hw, IXGBE_ANLP1); + regs_buff[1069] = IXGBE_READ_REG(hw, IXGBE_ANLP2); + regs_buff[1070] = IXGBE_READ_REG(hw, IXGBE_ATLASCTL); + + /* Diagnostic */ + regs_buff[1071] = IXGBE_READ_REG(hw, IXGBE_RDSTATCTL); + for (i = 0; i < 8; i++) + regs_buff[1072 + i] = IXGBE_READ_REG(hw, IXGBE_RDSTAT(i)); + regs_buff[1080] = IXGBE_READ_REG(hw, IXGBE_RDHMPN); + for (i = 0; i < 4; i++) + regs_buff[1081 + i] = IXGBE_READ_REG(hw, IXGBE_RIC_DW(i)); + regs_buff[1085] = IXGBE_READ_REG(hw, IXGBE_RDPROBE); + regs_buff[1086] = IXGBE_READ_REG(hw, IXGBE_TDSTATCTL); + for (i = 0; i < 8; i++) + regs_buff[1087 + i] = IXGBE_READ_REG(hw, IXGBE_TDSTAT(i)); + regs_buff[1095] = IXGBE_READ_REG(hw, IXGBE_TDHMPN); + for (i = 0; i < 4; i++) + regs_buff[1096 + i] = IXGBE_READ_REG(hw, IXGBE_TIC_DW(i)); + regs_buff[1100] = IXGBE_READ_REG(hw, IXGBE_TDPROBE); + regs_buff[1101] = IXGBE_READ_REG(hw, IXGBE_TXBUFCTRL); + regs_buff[1102] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA0); + regs_buff[1103] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA1); + regs_buff[1104] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA2); + regs_buff[1105] = IXGBE_READ_REG(hw, IXGBE_TXBUFDATA3); + regs_buff[1106] = IXGBE_READ_REG(hw, IXGBE_RXBUFCTRL); + regs_buff[1107] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA0); + regs_buff[1108] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA1); + regs_buff[1109] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA2); + regs_buff[1110] = IXGBE_READ_REG(hw, IXGBE_RXBUFDATA3); + for (i = 0; i < 8; i++) + regs_buff[1111 + i] = IXGBE_READ_REG(hw, IXGBE_PCIE_DIAG(i)); + regs_buff[1119] = IXGBE_READ_REG(hw, IXGBE_RFVAL); + regs_buff[1120] = IXGBE_READ_REG(hw, IXGBE_MDFTC1); + regs_buff[1121] = IXGBE_READ_REG(hw, IXGBE_MDFTC2); + regs_buff[1122] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO1); + regs_buff[1123] = IXGBE_READ_REG(hw, IXGBE_MDFTFIFO2); + regs_buff[1124] = IXGBE_READ_REG(hw, IXGBE_MDFTS); + regs_buff[1125] = IXGBE_READ_REG(hw, IXGBE_PCIEECCCTL); + regs_buff[1126] = IXGBE_READ_REG(hw, IXGBE_PBTXECC); + regs_buff[1127] = IXGBE_READ_REG(hw, IXGBE_PBRXECC); + + /* 82599 X540 specific registers */ + regs_buff[1128] = IXGBE_READ_REG(hw, IXGBE_MFLCN); +} + +static int ixgbe_get_eeprom_len(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + return adapter->hw.eeprom.word_size * 2; +} + +static int ixgbe_get_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u16 *eeprom_buff; + int first_word, last_word, eeprom_len; + int ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + eeprom->magic = hw->vendor_id | (hw->device_id << 16); + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_len = last_word - first_word + 1; + + eeprom_buff = kmalloc(sizeof(u16) * eeprom_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ret_val = ixgbe_read_eeprom_buffer(hw, first_word, eeprom_len, + eeprom_buff); + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < eeprom_len; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(bytes, (u8 *)eeprom_buff + (eeprom->offset & 1), eeprom->len); + kfree(eeprom_buff); + + return ret_val; +} + +static int ixgbe_set_eeprom(struct net_device *netdev, + struct ethtool_eeprom *eeprom, u8 *bytes) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u16 *eeprom_buff; + void *ptr; + int max_len, first_word, last_word, ret_val = 0; + u16 i; + + if (eeprom->len == 0) + return -EINVAL; + + if (eeprom->magic != (hw->vendor_id | (hw->device_id << 16))) + return -EINVAL; + + max_len = hw->eeprom.word_size * 2; + + first_word = eeprom->offset >> 1; + last_word = (eeprom->offset + eeprom->len - 1) >> 1; + eeprom_buff = kmalloc(max_len, GFP_KERNEL); + if (!eeprom_buff) + return -ENOMEM; + + ptr = eeprom_buff; + + if (eeprom->offset & 1) { + /* + * need read/modify/write of first changed EEPROM word + * only the second byte of the word is being modified + */ + ret_val = ixgbe_read_eeprom(hw, first_word, &eeprom_buff[0]); + if (ret_val) + goto err; + + ptr++; + } + if (((eeprom->offset + eeprom->len) & 1) && (ret_val == 0)) { + /* + * need read/modify/write of last changed EEPROM word + * only the first byte of the word is being modified + */ + ret_val = ixgbe_read_eeprom(hw, last_word, + &eeprom_buff[last_word - first_word]); + if (ret_val) + goto err; + } + + /* Device's eeprom is always little-endian, word addressable */ + for (i = 0; i < last_word - first_word + 1; i++) + le16_to_cpus(&eeprom_buff[i]); + + memcpy(ptr, bytes, eeprom->len); + + for (i = 0; i < last_word - first_word + 1; i++) + cpu_to_le16s(&eeprom_buff[i]); + + ret_val = ixgbe_write_eeprom_buffer(hw, first_word, + last_word - first_word + 1, + eeprom_buff); + + /* Update the checksum */ + if (ret_val == 0) + ixgbe_update_eeprom_checksum(hw); + +err: + kfree(eeprom_buff); + return ret_val; +} + +static void ixgbe_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + strlcpy(drvinfo->driver, ixgbe_driver_name, sizeof(drvinfo->driver)); + + strlcpy(drvinfo->version, ixgbe_driver_version, + sizeof(drvinfo->version)); + + strlcpy(drvinfo->fw_version, adapter->eeprom_id, + sizeof(drvinfo->fw_version)); + + strlcpy(drvinfo->bus_info, pci_name(adapter->pdev), + sizeof(drvinfo->bus_info)); + + drvinfo->n_stats = IXGBE_STATS_LEN; + drvinfo->testinfo_len = IXGBE_TEST_LEN; + drvinfo->regdump_len = ixgbe_get_regs_len(netdev); +} + +static void ixgbe_get_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ring->rx_max_pending = IXGBE_MAX_RXD; + ring->tx_max_pending = IXGBE_MAX_TXD; + ring->rx_mini_max_pending = 0; + ring->rx_jumbo_max_pending = 0; + ring->rx_pending = adapter->rx_ring_count; + ring->tx_pending = adapter->tx_ring_count; + ring->rx_mini_pending = 0; + ring->rx_jumbo_pending = 0; +} + +static int ixgbe_set_ringparam(struct net_device *netdev, + struct ethtool_ringparam *ring) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_ring *tx_ring = NULL, *rx_ring = NULL; + u32 new_rx_count, new_tx_count; + int i, err = 0; + + if ((ring->rx_mini_pending) || (ring->rx_jumbo_pending)) + return -EINVAL; + + new_tx_count = clamp_t(u32, ring->tx_pending, + IXGBE_MIN_TXD, IXGBE_MAX_TXD); + new_tx_count = ALIGN(new_tx_count, IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE); + + new_rx_count = clamp_t(u32, ring->rx_pending, + IXGBE_MIN_RXD, IXGBE_MAX_RXD); + new_rx_count = ALIGN(new_rx_count, IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE); + + /* if nothing to do return success */ + if ((new_tx_count == adapter->tx_ring_count) && + (new_rx_count == adapter->rx_ring_count)) + return 0; + + while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + + if (!netif_running(adapter->netdev)) { + for (i = 0; i < adapter->num_tx_queues; i++) + adapter->tx_ring[i]->count = new_tx_count; + for (i = 0; i < adapter->num_rx_queues; i++) + adapter->rx_ring[i]->count = new_rx_count; + adapter->tx_ring_count = new_tx_count; + adapter->rx_ring_count = new_rx_count; + goto clear_reset; + } + + /* alloc updated Tx resources */ + if (new_tx_count != adapter->tx_ring_count) { + tx_ring = vmalloc(adapter->num_tx_queues * sizeof(*tx_ring)); + if (!tx_ring) { + err = -ENOMEM; + goto clear_reset; + } + + for (i = 0; i < adapter->num_tx_queues; i++) { + /* clone ring and setup updated count */ + tx_ring[i] = *adapter->tx_ring[i]; + tx_ring[i].count = new_tx_count; + err = ixgbe_setup_tx_resources(&tx_ring[i]); + if (err) { + while (i) { + i--; + ixgbe_free_tx_resources(&tx_ring[i]); + } + + vfree(tx_ring); + tx_ring = NULL; + + goto clear_reset; + } + } + } + + /* alloc updated Rx resources */ + if (new_rx_count != adapter->rx_ring_count) { + rx_ring = vmalloc(adapter->num_rx_queues * sizeof(*rx_ring)); + if (!rx_ring) { + err = -ENOMEM; + goto clear_reset; + } + + for (i = 0; i < adapter->num_rx_queues; i++) { + /* clone ring and setup updated count */ + rx_ring[i] = *adapter->rx_ring[i]; + rx_ring[i].count = new_rx_count; + err = ixgbe_setup_rx_resources(&rx_ring[i]); + if (err) { + while (i) { + i--; + ixgbe_free_rx_resources(&rx_ring[i]); + } + + vfree(rx_ring); + rx_ring = NULL; + + goto clear_reset; + } + } + } + + /* bring interface down to prepare for update */ + ixgbe_down(adapter); + + /* Tx */ + if (tx_ring) { + for (i = 0; i < adapter->num_tx_queues; i++) { + ixgbe_free_tx_resources(adapter->tx_ring[i]); + *adapter->tx_ring[i] = tx_ring[i]; + } + adapter->tx_ring_count = new_tx_count; + + vfree(tx_ring); + tx_ring = NULL; + } + + /* Rx */ + if (rx_ring) { + for (i = 0; i < adapter->num_rx_queues; i++) { + ixgbe_free_rx_resources(adapter->rx_ring[i]); + *adapter->rx_ring[i] = rx_ring[i]; + } + adapter->rx_ring_count = new_rx_count; + + vfree(rx_ring); + rx_ring = NULL; + } + + /* restore interface using new values */ + ixgbe_up(adapter); + +clear_reset: + /* free Tx resources if Rx error is encountered */ + if (tx_ring) { + for (i = 0; i < adapter->num_tx_queues; i++) + ixgbe_free_tx_resources(&tx_ring[i]); + vfree(tx_ring); + } + + clear_bit(__IXGBE_RESETTING, &adapter->state); + return err; +} + +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT +static int ixgbe_get_stats_count(struct net_device *netdev) +{ + return IXGBE_STATS_LEN; +} + +#else /* HAVE_ETHTOOL_GET_SSET_COUNT */ +static int ixgbe_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_TEST: + return IXGBE_TEST_LEN; + case ETH_SS_STATS: + return IXGBE_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ +static void ixgbe_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats *stats, u64 *data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif + u64 *queue_stat; + int stat_count = sizeof(struct ixgbe_queue_stats) / sizeof(u64); + int i, j, k; + char *p; + + printk(KERN_DEBUG "ixgbe_stats 0\n"); + ixgbe_update_stats(adapter); + printk(KERN_DEBUG "ixgbe_stats 1\n"); + + for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) { + p = (char *)net_stats + ixgbe_gstrings_net_stats[i].stat_offset; + data[i] = (ixgbe_gstrings_net_stats[i].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + for (j = 0; j < IXGBE_GLOBAL_STATS_LEN; j++, i++) { + p = (char *)adapter + ixgbe_gstrings_stats[j].stat_offset; + data[i] = (ixgbe_gstrings_stats[j].sizeof_stat == + sizeof(u64)) ? *(u64 *)p : *(u32 *)p; + } + printk(KERN_DEBUG "ixgbe_stats 2\n"); +#ifdef NO_VNIC + for (j = 0; j < adapter->num_tx_queues; j++) { + queue_stat = (u64 *)&adapter->tx_ring[j]->stats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + i += k; + } + for (j = 0; j < adapter->num_rx_queues; j++) { + queue_stat = (u64 *)&adapter->rx_ring[j]->stats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + i += k; + } + printk(KERN_DEBUG "ixgbe_stats 3\n"); +#endif + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (j = 0; j < MAX_TX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxontxc[j]; + data[i++] = adapter->stats.pxofftxc[j]; + } + for (j = 0; j < MAX_RX_PACKET_BUFFERS; j++) { + data[i++] = adapter->stats.pxonrxc[j]; + data[i++] = adapter->stats.pxoffrxc[j]; + } + } + printk(KERN_DEBUG "ixgbe_stats 4\n"); + stat_count = sizeof(struct vf_stats) / sizeof(u64); + for (j = 0; j < adapter->num_vfs; j++) { + queue_stat = (u64 *)&adapter->vfinfo[j].vfstats; + for (k = 0; k < stat_count; k++) + data[i + k] = queue_stat[k]; + queue_stat = (u64 *)&adapter->vfinfo[j].saved_rst_vfstats; + for (k = 0; k < stat_count; k++) + data[i + k] += queue_stat[k]; + i += k; + } +} + +static void ixgbe_get_strings(struct net_device *netdev, u32 stringset, + u8 *data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + char *p = (char *)data; + int i; + + switch (stringset) { + case ETH_SS_TEST: + memcpy(data, *ixgbe_gstrings_test, + IXGBE_TEST_LEN * ETH_GSTRING_LEN); + break; + case ETH_SS_STATS: + for (i = 0; i < IXGBE_NETDEV_STATS_LEN; i++) { + memcpy(p, ixgbe_gstrings_net_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < IXGBE_GLOBAL_STATS_LEN; i++) { + memcpy(p, ixgbe_gstrings_stats[i].stat_string, + ETH_GSTRING_LEN); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_tx_queues; i++) { + sprintf(p, "tx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < adapter->num_rx_queues; i++) { + sprintf(p, "rx_queue_%u_packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_queue_%u_bytes", i); + p += ETH_GSTRING_LEN; + } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + sprintf(p, "tx_pb_%u_pxon", i); + p += ETH_GSTRING_LEN; + sprintf(p, "tx_pb_%u_pxoff", i); + p += ETH_GSTRING_LEN; + } + for (i = 0; i < MAX_RX_PACKET_BUFFERS; i++) { + sprintf(p, "rx_pb_%u_pxon", i); + p += ETH_GSTRING_LEN; + sprintf(p, "rx_pb_%u_pxoff", i); + p += ETH_GSTRING_LEN; + } + } + for (i = 0; i < adapter->num_vfs; i++) { + sprintf(p, "VF %d Rx Packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d Rx Bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d Tx Packets", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d Tx Bytes", i); + p += ETH_GSTRING_LEN; + sprintf(p, "VF %d MC Packets", i); + p += ETH_GSTRING_LEN; + } + /* BUG_ON(p - data != IXGBE_STATS_LEN * ETH_GSTRING_LEN); */ + break; + } +} + +static int ixgbe_link_test(struct ixgbe_adapter *adapter, u64 *data) +{ + struct ixgbe_hw *hw = &adapter->hw; + bool link_up; + u32 link_speed = 0; + *data = 0; + + hw->mac.ops.check_link(hw, &link_speed, &link_up, true); + if (link_up) + return *data; + else + *data = 1; + return *data; +} + +/* ethtool register test data */ +struct ixgbe_reg_test { + u16 reg; + u8 array_len; + u8 test_type; + u32 mask; + u32 write; +}; + +/* In the hardware, registers are laid out either singly, in arrays + * spaced 0x40 bytes apart, or in contiguous tables. We assume + * most tests take place on arrays or single registers (handled + * as a single-element array) and special-case the tables. + * Table tests are always pattern tests. + * + * We also make provision for some required setup steps by specifying + * registers to be written without any read-back testing. + */ + +#define PATTERN_TEST 1 +#define SET_READ_TEST 2 +#define WRITE_NO_TEST 3 +#define TABLE32_TEST 4 +#define TABLE64_TEST_LO 5 +#define TABLE64_TEST_HI 6 + +/* default 82599 register test */ +static struct ixgbe_reg_test reg_test_82599[] = { + { IXGBE_FCRTL_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCRTH_82599(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 }, + { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFF80 }, + { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE }, + { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 }, + { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFF80 }, + { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000001, 0x00000001 }, + { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x8001FFFF, 0x800CFFFF }, + { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +/* default 82598 register test */ +static struct ixgbe_reg_test reg_test_82598[] = { + { IXGBE_FCRTL(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_PFCTOP, 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_VLNCTRL, 1, PATTERN_TEST, 0x00000000, 0x00000000 }, + { IXGBE_RDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { IXGBE_RDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + /* Enable all four RX queues before testing. */ + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, IXGBE_RXDCTL_ENABLE }, + /* RDH is read-only for 82598, only test RDT. */ + { IXGBE_RDT(0), 4, PATTERN_TEST, 0x0000FFFF, 0x0000FFFF }, + { IXGBE_RXDCTL(0), 4, WRITE_NO_TEST, 0, 0 }, + { IXGBE_FCRTH(0), 1, PATTERN_TEST, 0x8007FFF0, 0x8007FFF0 }, + { IXGBE_FCTTV(0), 1, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TIPG, 1, PATTERN_TEST, 0x000000FF, 0x000000FF }, + { IXGBE_TDBAL(0), 4, PATTERN_TEST, 0xFFFFFF80, 0xFFFFFFFF }, + { IXGBE_TDBAH(0), 4, PATTERN_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_TDLEN(0), 4, PATTERN_TEST, 0x000FFF80, 0x000FFFFF }, + { IXGBE_RXCTRL, 1, SET_READ_TEST, 0x00000003, 0x00000003 }, + { IXGBE_DTXCTL, 1, SET_READ_TEST, 0x00000005, 0x00000005 }, + { IXGBE_RAL(0), 16, TABLE64_TEST_LO, 0xFFFFFFFF, 0xFFFFFFFF }, + { IXGBE_RAL(0), 16, TABLE64_TEST_HI, 0x800CFFFF, 0x800CFFFF }, + { IXGBE_MTA(0), 128, TABLE32_TEST, 0xFFFFFFFF, 0xFFFFFFFF }, + { 0, 0, 0, 0 } +}; + +#define REG_PATTERN_TEST(R, M, W) \ +{ \ + u32 pat, val, before; \ + const u32 _test[] = {0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF}; \ + for (pat = 0; pat < ARRAY_SIZE(_test); pat++) { \ + before = readl(adapter->hw.hw_addr + R); \ + writel((_test[pat] & W), (adapter->hw.hw_addr + R)); \ + val = readl(adapter->hw.hw_addr + R); \ + if (val != (_test[pat] & W & M)) { \ + e_err(drv, "pattern test reg %04X failed: got " \ + "0x%08X expected 0x%08X\n", \ + R, val, (_test[pat] & W & M)); \ + *data = R; \ + writel(before, adapter->hw.hw_addr + R); \ + return 1; \ + } \ + writel(before, adapter->hw.hw_addr + R); \ + } \ +} + +#define REG_SET_AND_CHECK(R, M, W) \ +{ \ + u32 val, before; \ + before = readl(adapter->hw.hw_addr + R); \ + writel((W & M), (adapter->hw.hw_addr + R)); \ + val = readl(adapter->hw.hw_addr + R); \ + if ((W & M) != (val & M)) { \ + e_err(drv, "set/check reg %04X test failed: got 0x%08X " \ + "expected 0x%08X\n", R, (val & M), (W & M)); \ + *data = R; \ + writel(before, (adapter->hw.hw_addr + R)); \ + return 1; \ + } \ + writel(before, (adapter->hw.hw_addr + R)); \ +} + +static int ixgbe_reg_test(struct ixgbe_adapter *adapter, u64 *data) +{ + struct ixgbe_reg_test *test; + u32 value, status_before, status_after; + u32 i, toggle; + + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + toggle = 0x7FFFF3FF; + test = reg_test_82598; + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + toggle = 0x7FFFF30F; + test = reg_test_82599; + break; + default: + *data = 1; + return 1; + break; + } + + /* + * Because the status register is such a special case, + * we handle it separately from the rest of the register + * tests. Some bits are read-only, some toggle, and some + * are writeable on newer MACs. + */ + status_before = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS); + value = (IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, toggle); + status_after = IXGBE_READ_REG(&adapter->hw, IXGBE_STATUS) & toggle; + if (value != status_after) { + e_err(drv, "failed STATUS register test got: " + "0x%08X expected: 0x%08X\n", status_after, value); + *data = 1; + return 1; + } + /* restore previous status */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_STATUS, status_before); + + /* + * Perform the remainder of the register test, looping through + * the test table until we either fail or reach the null entry. + */ + while (test->reg) { + for (i = 0; i < test->array_len; i++) { + switch (test->test_type) { + case PATTERN_TEST: + REG_PATTERN_TEST(test->reg + (i * 0x40), + test->mask, + test->write); + break; + case SET_READ_TEST: + REG_SET_AND_CHECK(test->reg + (i * 0x40), + test->mask, + test->write); + break; + case WRITE_NO_TEST: + writel(test->write, + (adapter->hw.hw_addr + test->reg) + + (i * 0x40)); + break; + case TABLE32_TEST: + REG_PATTERN_TEST(test->reg + (i * 4), + test->mask, + test->write); + break; + case TABLE64_TEST_LO: + REG_PATTERN_TEST(test->reg + (i * 8), + test->mask, + test->write); + break; + case TABLE64_TEST_HI: + REG_PATTERN_TEST((test->reg + 4) + (i * 8), + test->mask, + test->write); + break; + } + } + test++; + } + + *data = 0; + return 0; +} + +static int ixgbe_eeprom_test(struct ixgbe_adapter *adapter, u64 *data) +{ + if (ixgbe_validate_eeprom_checksum(&adapter->hw, NULL)) + *data = 1; + else + *data = 0; + return *data; +} + +static irqreturn_t ixgbe_test_intr(int irq, void *data) +{ + struct net_device *netdev = (struct net_device *) data; + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + adapter->test_icr |= IXGBE_READ_REG(&adapter->hw, IXGBE_EICR); + + return IRQ_HANDLED; +} + +static int ixgbe_intr_test(struct ixgbe_adapter *adapter, u64 *data) +{ + struct net_device *netdev = adapter->netdev; + u32 mask, i = 0, shared_int = true; + u32 irq = adapter->pdev->irq; + + *data = 0; + + /* Hook up test interrupt handler just for this test */ + if (adapter->msix_entries) { + /* NOTE: we don't test MSI-X interrupts here, yet */ + return 0; + } else if (adapter->flags & IXGBE_FLAG_MSI_ENABLED) { + shared_int = false; + if (request_irq(irq, &ixgbe_test_intr, 0, netdev->name, + netdev)) { + *data = 1; + return -1; + } + } else if (!request_irq(irq, &ixgbe_test_intr, IRQF_PROBE_SHARED, + netdev->name, netdev)) { + shared_int = false; + } else if (request_irq(irq, &ixgbe_test_intr, IRQF_SHARED, + netdev->name, netdev)) { + *data = 1; + return -1; + } + e_info(hw, "testing %s interrupt\n", + (shared_int ? "shared" : "unshared")); + + /* Disable all the interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + /* Test each interrupt */ + for (; i < 10; i++) { + /* Interrupt to test */ + mask = 1 << i; + + if (!shared_int) { + /* + * Disable the interrupts to be reported in + * the cause register and then force the same + * interrupt and see if one gets posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, + ~mask & 0x00007FFF); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, + ~mask & 0x00007FFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + if (adapter->test_icr & mask) { + *data = 3; + break; + } + } + + /* + * Enable the interrupt to be reported in the cause + * register and then force the same interrupt and see + * if one gets posted. If an interrupt was not posted + * to the bus, the test failed. + */ + adapter->test_icr = 0; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMS, mask); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, mask); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + if (!(adapter->test_icr & mask)) { + *data = 4; + break; + } + + if (!shared_int) { + /* + * Disable the other interrupts to be reported in + * the cause register and then force the other + * interrupts and see if any get posted. If + * an interrupt was posted to the bus, the + * test failed. + */ + adapter->test_icr = 0; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, + ~mask & 0x00007FFF); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EICS, + ~mask & 0x00007FFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + if (adapter->test_icr) { + *data = 5; + break; + } + } + } + + /* Disable all the interrupts */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EIMC, 0xFFFFFFFF); + IXGBE_WRITE_FLUSH(&adapter->hw); + usleep_range(10000, 20000); + + /* Unhook test interrupt handler */ + free_irq(irq, netdev); + + return *data; +} + + + +static int ixgbe_setup_loopback_test(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 reg_data; + + /* X540 needs to set the MACC.FLU bit to force link up */ + if (adapter->hw.mac.type == ixgbe_mac_X540) { + reg_data = IXGBE_READ_REG(hw, IXGBE_MACC); + reg_data |= IXGBE_MACC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_MACC, reg_data); + } + + /* right now we only support MAC loopback in the driver */ + reg_data = IXGBE_READ_REG(hw, IXGBE_HLREG0); + /* Setup MAC loopback */ + reg_data |= IXGBE_HLREG0_LPBK; + IXGBE_WRITE_REG(hw, IXGBE_HLREG0, reg_data); + + reg_data = IXGBE_READ_REG(hw, IXGBE_FCTRL); + reg_data |= IXGBE_FCTRL_BAM | IXGBE_FCTRL_SBP | IXGBE_FCTRL_MPE; + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, reg_data); + + reg_data = IXGBE_READ_REG(hw, IXGBE_AUTOC); + reg_data &= ~IXGBE_AUTOC_LMS_MASK; + reg_data |= IXGBE_AUTOC_LMS_10G_LINK_NO_AN | IXGBE_AUTOC_FLU; + IXGBE_WRITE_REG(hw, IXGBE_AUTOC, reg_data); + IXGBE_WRITE_FLUSH(hw); + usleep_range(10000, 20000); + + /* Disable Atlas Tx lanes; re-enabled in reset path */ + if (hw->mac.type == ixgbe_mac_82598EB) { + u8 atlas; + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_REG_EN; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_LPBK, atlas); + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_10G_QL_ALL; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_10G, atlas); + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_1G_QL_ALL; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_1G, atlas); + + ixgbe_read_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, &atlas); + atlas |= IXGBE_ATLAS_PDN_TX_AN_QL_ALL; + ixgbe_write_analog_reg8(hw, IXGBE_ATLAS_PDN_AN, atlas); + } + + return 0; +} + +static void ixgbe_loopback_cleanup(struct ixgbe_adapter *adapter) +{ + u32 reg_data; + + reg_data = IXGBE_READ_REG(&adapter->hw, IXGBE_HLREG0); + reg_data &= ~IXGBE_HLREG0_LPBK; + IXGBE_WRITE_REG(&adapter->hw, IXGBE_HLREG0, reg_data); +} + + + + + + +static int ixgbe_loopback_test(struct ixgbe_adapter *adapter, u64 *data) +{ + + //*data = ixgbe_setup_desc_rings(adapter); + //if (*data) + // goto out; + *data = ixgbe_setup_loopback_test(adapter); + if (*data) + goto err_loopback; + //*data = ixgbe_run_loopback_test(adapter); + ixgbe_loopback_cleanup(adapter); + +err_loopback: + //ixgbe_free_desc_rings(adapter); +//out: + return *data; + +} + +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT +static int ixgbe_diag_test_count(struct net_device *netdev) +{ + return IXGBE_TEST_LEN; +} + +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ +static void ixgbe_diag_test(struct net_device *netdev, + struct ethtool_test *eth_test, u64 *data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + bool if_running = netif_running(netdev); + + set_bit(__IXGBE_TESTING, &adapter->state); + if (eth_test->flags == ETH_TEST_FL_OFFLINE) { + /* Offline tests */ + + e_info(hw, "offline testing starting\n"); + + /* Link test performed before hardware reset so autoneg doesn't + * interfere with test result */ + if (ixgbe_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) { + int i; + for (i = 0; i < adapter->num_vfs; i++) { + if (adapter->vfinfo[i].clear_to_send) { + e_warn(drv, "Please take active VFS " + "offline and restart the " + "adapter before running NIC " + "diagnostics\n"); + data[0] = 1; + data[1] = 1; + data[2] = 1; + data[3] = 1; + eth_test->flags |= ETH_TEST_FL_FAILED; + clear_bit(__IXGBE_TESTING, + &adapter->state); + goto skip_ol_tests; + } + } + } + + if (if_running) + /* indicate we're in test mode */ + dev_close(netdev); + else + ixgbe_reset(adapter); + + e_info(hw, "register testing starting\n"); + if (ixgbe_reg_test(adapter, &data[0])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + ixgbe_reset(adapter); + e_info(hw, "eeprom testing starting\n"); + if (ixgbe_eeprom_test(adapter, &data[1])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + ixgbe_reset(adapter); + e_info(hw, "interrupt testing starting\n"); + if (ixgbe_intr_test(adapter, &data[2])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* If SRIOV or VMDq is enabled then skip MAC + * loopback diagnostic. */ + if (adapter->flags & (IXGBE_FLAG_SRIOV_ENABLED | + IXGBE_FLAG_VMDQ_ENABLED)) { + e_info(hw, "skip MAC loopback diagnostic in VT mode\n"); + data[3] = 0; + goto skip_loopback; + } + + ixgbe_reset(adapter); + e_info(hw, "loopback testing starting\n"); + if (ixgbe_loopback_test(adapter, &data[3])) + eth_test->flags |= ETH_TEST_FL_FAILED; + +skip_loopback: + ixgbe_reset(adapter); + + clear_bit(__IXGBE_TESTING, &adapter->state); + if (if_running) + dev_open(netdev); + } else { + e_info(hw, "online testing starting\n"); + /* Online tests */ + if (ixgbe_link_test(adapter, &data[4])) + eth_test->flags |= ETH_TEST_FL_FAILED; + + /* Online tests aren't run; pass by default */ + data[0] = 0; + data[1] = 0; + data[2] = 0; + data[3] = 0; + + clear_bit(__IXGBE_TESTING, &adapter->state); + } +skip_ol_tests: + msleep_interruptible(4 * 1000); +} + +static int ixgbe_wol_exclusion(struct ixgbe_adapter *adapter, + struct ethtool_wolinfo *wol) +{ + struct ixgbe_hw *hw = &adapter->hw; + int retval = 1; + u16 wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; + + /* WOL not supported except for the following */ + switch (hw->device_id) { + case IXGBE_DEV_ID_82599_SFP: + /* Only these subdevice could supports WOL */ + switch (hw->subsystem_device_id) { + case IXGBE_SUBDEV_ID_82599_560FLR: + /* only support first port */ + if (hw->bus.func != 0) { + wol->supported = 0; + break; + } + case IXGBE_SUBDEV_ID_82599_SFP: + retval = 0; + break; + default: + wol->supported = 0; + break; + } + break; + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + /* All except this subdevice support WOL */ + if (hw->subsystem_device_id == + IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) { + wol->supported = 0; + break; + } + retval = 0; + break; + case IXGBE_DEV_ID_82599_KX4: + retval = 0; + break; + case IXGBE_DEV_ID_X540T: + /* check eeprom to see if enabled wol */ + if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && + (hw->bus.func == 0))) { + retval = 0; + break; + } + + /* All others not supported */ + wol->supported = 0; + break; + default: + wol->supported = 0; + } + return retval; +} + +static void ixgbe_get_wol(struct net_device *netdev, + struct ethtool_wolinfo *wol) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + wol->supported = WAKE_UCAST | WAKE_MCAST | + WAKE_BCAST | WAKE_MAGIC; + wol->wolopts = 0; + + if (ixgbe_wol_exclusion(adapter, wol) || + !device_can_wakeup(&adapter->pdev->dev)) + return; + + if (adapter->wol & IXGBE_WUFC_EX) + wol->wolopts |= WAKE_UCAST; + if (adapter->wol & IXGBE_WUFC_MC) + wol->wolopts |= WAKE_MCAST; + if (adapter->wol & IXGBE_WUFC_BC) + wol->wolopts |= WAKE_BCAST; + if (adapter->wol & IXGBE_WUFC_MAG) + wol->wolopts |= WAKE_MAGIC; +} + +static int ixgbe_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (wol->wolopts & (WAKE_PHY | WAKE_ARP | WAKE_MAGICSECURE)) + return -EOPNOTSUPP; + + if (ixgbe_wol_exclusion(adapter, wol)) + return wol->wolopts ? -EOPNOTSUPP : 0; + + adapter->wol = 0; + + if (wol->wolopts & WAKE_UCAST) + adapter->wol |= IXGBE_WUFC_EX; + if (wol->wolopts & WAKE_MCAST) + adapter->wol |= IXGBE_WUFC_MC; + if (wol->wolopts & WAKE_BCAST) + adapter->wol |= IXGBE_WUFC_BC; + if (wol->wolopts & WAKE_MAGIC) + adapter->wol |= IXGBE_WUFC_MAG; + + device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + return 0; +} + +static int ixgbe_nway_reset(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + + return 0; +} + +#ifdef HAVE_ETHTOOL_SET_PHYS_ID +static int ixgbe_set_phys_id(struct net_device *netdev, + enum ethtool_phys_id_state state) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + + switch (state) { + case ETHTOOL_ID_ACTIVE: + adapter->led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + return 2; + + case ETHTOOL_ID_ON: + hw->mac.ops.led_on(hw, IXGBE_LED_ON); + break; + + case ETHTOOL_ID_OFF: + hw->mac.ops.led_off(hw, IXGBE_LED_ON); + break; + + case ETHTOOL_ID_INACTIVE: + /* Restore LED settings */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_LEDCTL, adapter->led_reg); + break; + } + + return 0; +} +#else +static int ixgbe_phys_id(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 led_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + u32 i; + + if (!data || data > 300) + data = 300; + + for (i = 0; i < (data * 1000); i += 400) { + ixgbe_led_on(hw, IXGBE_LED_ON); + msleep_interruptible(200); + ixgbe_led_off(hw, IXGBE_LED_ON); + msleep_interruptible(200); + } + + /* Restore LED settings */ + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, led_reg); + + return 0; +} +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ + +static int ixgbe_get_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + ec->tx_max_coalesced_frames_irq = adapter->tx_work_limit; +#ifndef CONFIG_IXGBE_NAPI + ec->rx_max_coalesced_frames_irq = adapter->rx_work_limit; +#endif /* CONFIG_IXGBE_NAPI */ + /* only valid if in constant ITR mode */ + if (adapter->rx_itr_setting <= 1) + ec->rx_coalesce_usecs = adapter->rx_itr_setting; + else + ec->rx_coalesce_usecs = adapter->rx_itr_setting >> 2; + + /* if in mixed tx/rx queues per vector mode, report only rx settings */ + if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count) + return 0; + + /* only valid if in constant ITR mode */ + if (adapter->tx_itr_setting <= 1) + ec->tx_coalesce_usecs = adapter->tx_itr_setting; + else + ec->tx_coalesce_usecs = adapter->tx_itr_setting >> 2; + + return 0; +} + +/* + * this function must be called before setting the new value of + * rx_itr_setting + */ +#ifdef NO_VNIC +static bool ixgbe_update_rsc(struct ixgbe_adapter *adapter) +{ + struct net_device *netdev = adapter->netdev; + + /* nothing to do if LRO or RSC are not enabled */ + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) || + !(netdev->features & NETIF_F_LRO)) + return false; + + /* check the feature flag value and enable RSC if necessary */ + if (adapter->rx_itr_setting == 1 || + adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { + if (!(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + e_info(probe, "rx-usecs value high enough " + "to re-enable RSC\n"); + return true; + } + /* if interrupt rate is too high then disable RSC */ + } else if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; +#ifdef IXGBE_NO_LRO + e_info(probe, "rx-usecs set too low, disabling RSC\n"); +#else + e_info(probe, "rx-usecs set too low, " + "falling back to software LRO\n"); +#endif + return true; + } + return false; +} +#endif + +static int ixgbe_set_coalesce(struct net_device *netdev, + struct ethtool_coalesce *ec) +{ +#ifdef NO_VNIC + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_q_vector *q_vector; + int i; + int num_vectors; + u16 tx_itr_param, rx_itr_param; + bool need_reset = false; + + /* don't accept tx specific changes if we've got mixed RxTx vectors */ + if (adapter->q_vector[0]->tx.count && adapter->q_vector[0]->rx.count + && ec->tx_coalesce_usecs) + return -EINVAL; + + if (ec->tx_max_coalesced_frames_irq) + adapter->tx_work_limit = ec->tx_max_coalesced_frames_irq; + +#ifndef CONFIG_IXGBE_NAPI + if (ec->rx_max_coalesced_frames_irq) + adapter->rx_work_limit = ec->rx_max_coalesced_frames_irq; + +#endif + if ((ec->rx_coalesce_usecs > (IXGBE_MAX_EITR >> 2)) || + (ec->tx_coalesce_usecs > (IXGBE_MAX_EITR >> 2))) + return -EINVAL; + + if (ec->rx_coalesce_usecs > 1) + adapter->rx_itr_setting = ec->rx_coalesce_usecs << 2; + else + adapter->rx_itr_setting = ec->rx_coalesce_usecs; + + if (adapter->rx_itr_setting == 1) + rx_itr_param = IXGBE_20K_ITR; + else + rx_itr_param = adapter->rx_itr_setting; + + if (ec->tx_coalesce_usecs > 1) + adapter->tx_itr_setting = ec->tx_coalesce_usecs << 2; + else + adapter->tx_itr_setting = ec->tx_coalesce_usecs; + + if (adapter->tx_itr_setting == 1) + tx_itr_param = IXGBE_10K_ITR; + else + tx_itr_param = adapter->tx_itr_setting; + + /* check the old value and enable RSC if necessary */ + need_reset = ixgbe_update_rsc(adapter); + + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + num_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + else + num_vectors = 1; + + for (i = 0; i < num_vectors; i++) { + q_vector = adapter->q_vector[i]; + q_vector->tx.work_limit = adapter->tx_work_limit; + q_vector->rx.work_limit = adapter->rx_work_limit; + if (q_vector->tx.count && !q_vector->rx.count) + /* tx only */ + q_vector->itr = tx_itr_param; + else + /* rx only or mixed */ + q_vector->itr = rx_itr_param; + ixgbe_write_eitr(q_vector); + } + + /* + * do reset here at the end to make sure EITR==0 case is handled + * correctly w.r.t stopping tx, and changing TXDCTL.WTHRESH settings + * also locks in RSC enable/disable which requires reset + */ + if (need_reset) + ixgbe_do_reset(netdev); +#endif + return 0; +} + +#ifndef HAVE_NDO_SET_FEATURES +static u32 ixgbe_get_rx_csum(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_ring *ring = adapter->rx_ring[0]; + return test_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); +} + +static int ixgbe_set_rx_csum(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *ring = adapter->rx_ring[i]; + if (data) + set_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); + else + clear_bit(__IXGBE_RX_CSUM_ENABLED, &ring->state); + } + + /* LRO and RSC both depend on RX checksum to function */ + if (!data && (netdev->features & NETIF_F_LRO)) { + netdev->features &= ~NETIF_F_LRO; + + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + ixgbe_do_reset(netdev); + } + } + + return 0; +} + +static u32 ixgbe_get_tx_csum(struct net_device *netdev) +{ + return (netdev->features & NETIF_F_IP_CSUM) != 0; +} + +static int ixgbe_set_tx_csum(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u32 feature_list; + +#ifdef NETIF_F_IPV6_CSUM + feature_list = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM; +#else + feature_list = NETIF_F_IP_CSUM; +#endif + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + feature_list |= NETIF_F_SCTP_CSUM; + break; + default: + break; + } + if (data) + netdev->features |= feature_list; + else + netdev->features &= ~feature_list; + + return 0; +} + +#ifdef NETIF_F_TSO +static int ixgbe_set_tso(struct net_device *netdev, u32 data) +{ + if (data) { + netdev->features |= NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + netdev->features |= NETIF_F_TSO6; +#endif + } else { +#ifndef HAVE_NETDEV_VLAN_FEATURES +#ifdef NETIF_F_HW_VLAN_TX + struct ixgbe_adapter *adapter = netdev_priv(netdev); + /* disable TSO on all VLANs if they're present */ + if (adapter->vlgrp) { + int i; + struct net_device *v_netdev; + for (i = 0; i < VLAN_N_VID; i++) { + v_netdev = + vlan_group_get_device(adapter->vlgrp, i); + if (v_netdev) { + v_netdev->features &= ~NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + v_netdev->features &= ~NETIF_F_TSO6; +#endif + vlan_group_set_device(adapter->vlgrp, i, + v_netdev); + } + } + } +#endif +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + netdev->features &= ~NETIF_F_TSO; +#ifdef NETIF_F_TSO6 + netdev->features &= ~NETIF_F_TSO6; +#endif + } + return 0; +} + +#endif /* NETIF_F_TSO */ +#ifdef ETHTOOL_GFLAGS +static int ixgbe_set_flags(struct net_device *netdev, u32 data) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + u32 supported_flags = ETH_FLAG_RXVLAN | ETH_FLAG_TXVLAN; + u32 changed = netdev->features ^ data; + bool need_reset = false; + int rc; + +#ifndef HAVE_VLAN_RX_REGISTER + if ((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && + !(data & ETH_FLAG_RXVLAN)) + return -EINVAL; + +#endif +#ifdef NETIF_F_RXHASH + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) + supported_flags |= ETH_FLAG_RXHASH; +#endif +#ifdef IXGBE_NO_LRO + if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) +#endif + supported_flags |= ETH_FLAG_LRO; + +#ifdef ETHTOOL_GRXRINGS + switch (adapter->hw.mac.type) { + case ixgbe_mac_X540: + case ixgbe_mac_82599EB: + supported_flags |= ETH_FLAG_NTUPLE; + default: + break; + } + +#endif + rc = ethtool_op_set_flags(netdev, data, supported_flags); + if (rc) + return rc; + +#ifndef HAVE_VLAN_RX_REGISTER + if (changed & ETH_FLAG_RXVLAN) + ixgbe_vlan_mode(netdev, netdev->features); + +#endif + /* if state changes we need to update adapter->flags and reset */ + if (!(netdev->features & NETIF_F_LRO)) { + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) + need_reset = true; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_ENABLED; + } else if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && + !(adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED)) { + if (adapter->rx_itr_setting == 1 || + adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + need_reset = true; + } else if (changed & ETH_FLAG_LRO) { +#ifdef IXGBE_NO_LRO + e_info(probe, "rx-usecs set too low, " + "disabling RSC\n"); +#else + e_info(probe, "rx-usecs set too low, " + "falling back to software LRO\n"); +#endif + } + } + +#ifdef ETHTOOL_GRXRINGS + /* + * Check if Flow Director n-tuple support was enabled or disabled. If + * the state changed, we need to reset. + */ + if (!(netdev->features & NETIF_F_NTUPLE)) { + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + /* turn off Flow Director, set ATR and reset */ + if ((adapter->flags & IXGBE_FLAG_RSS_ENABLED) && + !(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + adapter->flags |= IXGBE_FLAG_FDIR_HASH_CAPABLE; + need_reset = true; + } + adapter->flags &= ~IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + } else if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) { + /* turn off ATR, enable perfect filters and reset */ + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + adapter->flags |= IXGBE_FLAG_FDIR_PERFECT_CAPABLE; + need_reset = true; + } + +#endif /* ETHTOOL_GRXRINGS */ + if (need_reset) + ixgbe_do_reset(netdev); + + return 0; +} + +#endif /* ETHTOOL_GFLAGS */ +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_GRXRINGS +static int ixgbe_get_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + union ixgbe_atr_input *mask = &adapter->fdir_mask; + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct hlist_node *node, *node2; + struct ixgbe_fdir_filter *rule = NULL; + + /* report total rule count */ + cmd->data = (1024 << adapter->fdir_pballoc) - 2; + + hlist_for_each_entry_safe(rule, node, node2, + &adapter->fdir_filter_list, fdir_node) { + if (fsp->location <= rule->sw_idx) + break; + } + + if (!rule || fsp->location != rule->sw_idx) + return -EINVAL; + + /* fill out the flow spec entry */ + + /* set flow type field */ + switch (rule->filter.formatted.flow_type) { + case IXGBE_ATR_FLOW_TYPE_TCPV4: + fsp->flow_type = TCP_V4_FLOW; + break; + case IXGBE_ATR_FLOW_TYPE_UDPV4: + fsp->flow_type = UDP_V4_FLOW; + break; + case IXGBE_ATR_FLOW_TYPE_SCTPV4: + fsp->flow_type = SCTP_V4_FLOW; + break; + case IXGBE_ATR_FLOW_TYPE_IPV4: + fsp->flow_type = IP_USER_FLOW; + fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4; + fsp->h_u.usr_ip4_spec.proto = 0; + fsp->m_u.usr_ip4_spec.proto = 0; + break; + default: + return -EINVAL; + } + + fsp->h_u.tcp_ip4_spec.psrc = rule->filter.formatted.src_port; + fsp->m_u.tcp_ip4_spec.psrc = mask->formatted.src_port; + fsp->h_u.tcp_ip4_spec.pdst = rule->filter.formatted.dst_port; + fsp->m_u.tcp_ip4_spec.pdst = mask->formatted.dst_port; + fsp->h_u.tcp_ip4_spec.ip4src = rule->filter.formatted.src_ip[0]; + fsp->m_u.tcp_ip4_spec.ip4src = mask->formatted.src_ip[0]; + fsp->h_u.tcp_ip4_spec.ip4dst = rule->filter.formatted.dst_ip[0]; + fsp->m_u.tcp_ip4_spec.ip4dst = mask->formatted.dst_ip[0]; + fsp->h_ext.vlan_tci = rule->filter.formatted.vlan_id; + fsp->m_ext.vlan_tci = mask->formatted.vlan_id; + fsp->h_ext.vlan_etype = rule->filter.formatted.flex_bytes; + fsp->m_ext.vlan_etype = mask->formatted.flex_bytes; + fsp->h_ext.data[1] = htonl(rule->filter.formatted.vm_pool); + fsp->m_ext.data[1] = htonl(mask->formatted.vm_pool); + fsp->flow_type |= FLOW_EXT; + + /* record action */ + if (rule->action == IXGBE_FDIR_DROP_QUEUE) + fsp->ring_cookie = RX_CLS_FLOW_DISC; + else + fsp->ring_cookie = rule->action; + + return 0; +} + +static int ixgbe_get_ethtool_fdir_all(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd, + u32 *rule_locs) +{ + struct hlist_node *node, *node2; + struct ixgbe_fdir_filter *rule; + int cnt = 0; + + /* report total rule count */ + cmd->data = (1024 << adapter->fdir_pballoc) - 2; + + hlist_for_each_entry_safe(rule, node, node2, + &adapter->fdir_filter_list, fdir_node) { + if (cnt == cmd->rule_cnt) + return -EMSGSIZE; + rule_locs[cnt] = rule->sw_idx; + cnt++; + } + + cmd->rule_cnt = cnt; + + return 0; +} + +static int ixgbe_get_rss_hash_opts(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + cmd->data = 0; + + /* if RSS is disabled then report no hashing */ + if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) + return 0; + + /* Report default options for RSS on ixgbe */ + switch (cmd->flow_type) { + case TCP_V4_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V4_FLOW: + if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V4_FLOW: + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case IPV4_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + case TCP_V6_FLOW: + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case UDP_V6_FLOW: + if (adapter->flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) + cmd->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3; + case SCTP_V6_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case IPV6_FLOW: + cmd->data |= RXH_IP_SRC | RXH_IP_DST; + break; + default: + return -EINVAL; + } + + return 0; +} + +static int ixgbe_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd, +#ifdef HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS + void *rule_locs) +#else + u32 *rule_locs) +#endif +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_GRXRINGS: + cmd->data = adapter->num_rx_queues; + ret = 0; + break; + case ETHTOOL_GRXCLSRLCNT: + cmd->rule_cnt = adapter->fdir_filter_count; + ret = 0; + break; + case ETHTOOL_GRXCLSRULE: + ret = ixgbe_get_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_GRXCLSRLALL: + ret = ixgbe_get_ethtool_fdir_all(adapter, cmd, + (u32 *)rule_locs); + break; + case ETHTOOL_GRXFH: + ret = ixgbe_get_rss_hash_opts(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +static int ixgbe_update_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ixgbe_fdir_filter *input, + u16 sw_idx) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct hlist_node *node, *node2, *parent; + struct ixgbe_fdir_filter *rule; + int err = -EINVAL; + + parent = NULL; + rule = NULL; + + hlist_for_each_entry_safe(rule, node, node2, + &adapter->fdir_filter_list, fdir_node) { + /* hash found, or no matching entry */ + if (rule->sw_idx >= sw_idx) + break; + parent = node; + } + + /* if there is an old rule occupying our place remove it */ + if (rule && (rule->sw_idx == sw_idx)) { + if (!input || (rule->filter.formatted.bkt_hash != + input->filter.formatted.bkt_hash)) { + err = ixgbe_fdir_erase_perfect_filter_82599(hw, + &rule->filter, + sw_idx); + } + + hlist_del(&rule->fdir_node); + kfree(rule); + adapter->fdir_filter_count--; + } + + /* + * If no input this was a delete, err should be 0 if a rule was + * successfully found and removed from the list else -EINVAL + */ + if (!input) + return err; + + /* initialize node and set software index */ + INIT_HLIST_NODE(&input->fdir_node); + + /* add filter to the list */ + if (parent) + hlist_add_after(parent, &input->fdir_node); + else + hlist_add_head(&input->fdir_node, + &adapter->fdir_filter_list); + + /* update counts */ + adapter->fdir_filter_count++; + + return 0; +} + +static int ixgbe_flowspec_to_flow_type(struct ethtool_rx_flow_spec *fsp, + u8 *flow_type) +{ + switch (fsp->flow_type & ~FLOW_EXT) { + case TCP_V4_FLOW: + *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; + break; + case UDP_V4_FLOW: + *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4; + break; + case SCTP_V4_FLOW: + *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4; + break; + case IP_USER_FLOW: + switch (fsp->h_u.usr_ip4_spec.proto) { + case IPPROTO_TCP: + *flow_type = IXGBE_ATR_FLOW_TYPE_TCPV4; + break; + case IPPROTO_UDP: + *flow_type = IXGBE_ATR_FLOW_TYPE_UDPV4; + break; + case IPPROTO_SCTP: + *flow_type = IXGBE_ATR_FLOW_TYPE_SCTPV4; + break; + case 0: + if (!fsp->m_u.usr_ip4_spec.proto) { + *flow_type = IXGBE_ATR_FLOW_TYPE_IPV4; + break; + } + default: + return 0; + } + break; + default: + return 0; + } + + return 1; +} + +static int ixgbe_add_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_fdir_filter *input; + union ixgbe_atr_input mask; + int err; + + if (!(adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE)) + return -EOPNOTSUPP; + + /* + * Don't allow programming if the action is a queue greater than + * the number of online Rx queues. + */ + if ((fsp->ring_cookie != RX_CLS_FLOW_DISC) && + (fsp->ring_cookie >= adapter->num_rx_queues)) + return -EINVAL; + + /* Don't allow indexes to exist outside of available space */ + if (fsp->location >= ((1024 << adapter->fdir_pballoc) - 2)) { + e_err(drv, "Location out of range\n"); + return -EINVAL; + } + + input = kzalloc(sizeof(*input), GFP_ATOMIC); + if (!input) + return -ENOMEM; + + memset(&mask, 0, sizeof(union ixgbe_atr_input)); + + /* set SW index */ + input->sw_idx = fsp->location; + + /* record flow type */ + if (!ixgbe_flowspec_to_flow_type(fsp, + &input->filter.formatted.flow_type)) { + e_err(drv, "Unrecognized flow type\n"); + goto err_out; + } + + mask.formatted.flow_type = IXGBE_ATR_L4TYPE_IPV6_MASK | + IXGBE_ATR_L4TYPE_MASK; + + if (input->filter.formatted.flow_type == IXGBE_ATR_FLOW_TYPE_IPV4) + mask.formatted.flow_type &= IXGBE_ATR_L4TYPE_IPV6_MASK; + + /* Copy input into formatted structures */ + input->filter.formatted.src_ip[0] = fsp->h_u.tcp_ip4_spec.ip4src; + mask.formatted.src_ip[0] = fsp->m_u.tcp_ip4_spec.ip4src; + input->filter.formatted.dst_ip[0] = fsp->h_u.tcp_ip4_spec.ip4dst; + mask.formatted.dst_ip[0] = fsp->m_u.tcp_ip4_spec.ip4dst; + input->filter.formatted.src_port = fsp->h_u.tcp_ip4_spec.psrc; + mask.formatted.src_port = fsp->m_u.tcp_ip4_spec.psrc; + input->filter.formatted.dst_port = fsp->h_u.tcp_ip4_spec.pdst; + mask.formatted.dst_port = fsp->m_u.tcp_ip4_spec.pdst; + + if (fsp->flow_type & FLOW_EXT) { + input->filter.formatted.vm_pool = + (unsigned char)ntohl(fsp->h_ext.data[1]); + mask.formatted.vm_pool = + (unsigned char)ntohl(fsp->m_ext.data[1]); + input->filter.formatted.vlan_id = fsp->h_ext.vlan_tci; + mask.formatted.vlan_id = fsp->m_ext.vlan_tci; + input->filter.formatted.flex_bytes = + fsp->h_ext.vlan_etype; + mask.formatted.flex_bytes = fsp->m_ext.vlan_etype; + } + + /* determine if we need to drop or route the packet */ + if (fsp->ring_cookie == RX_CLS_FLOW_DISC) + input->action = IXGBE_FDIR_DROP_QUEUE; + else + input->action = fsp->ring_cookie; + + spin_lock(&adapter->fdir_perfect_lock); + + if (hlist_empty(&adapter->fdir_filter_list)) { + /* save mask and program input mask into HW */ + memcpy(&adapter->fdir_mask, &mask, sizeof(mask)); + err = ixgbe_fdir_set_input_mask_82599(hw, &mask); + if (err) { + e_err(drv, "Error writing mask\n"); + goto err_out_w_lock; + } + } else if (memcmp(&adapter->fdir_mask, &mask, sizeof(mask))) { + e_err(drv, "Only one mask supported per port\n"); + goto err_out_w_lock; + } + + /* apply mask and compute/store hash */ + ixgbe_atr_compute_perfect_hash_82599(&input->filter, &mask); + + /* program filters to filter memory */ + err = ixgbe_fdir_write_perfect_filter_82599(hw, + &input->filter, input->sw_idx, + (input->action == IXGBE_FDIR_DROP_QUEUE) ? + IXGBE_FDIR_DROP_QUEUE : + adapter->rx_ring[input->action]->reg_idx); + if (err) + goto err_out_w_lock; + + ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx); + + spin_unlock(&adapter->fdir_perfect_lock); + + kfree(input); + return err; +err_out_w_lock: + spin_unlock(&adapter->fdir_perfect_lock); +err_out: + kfree(input); + return -EINVAL; +} + +static int ixgbe_del_ethtool_fdir_entry(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *cmd) +{ + struct ethtool_rx_flow_spec *fsp = + (struct ethtool_rx_flow_spec *)&cmd->fs; + int err; + + spin_lock(&adapter->fdir_perfect_lock); + err = ixgbe_update_ethtool_fdir_entry(adapter, NULL, (u16)(fsp->location)); + spin_unlock(&adapter->fdir_perfect_lock); + + return err; +} + +#ifdef ETHTOOL_SRXNTUPLE +/* + * We need to keep this around for kernels 2.6.33 - 2.6.39 in order to avoid + * a null pointer dereference as it was assumend if the NETIF_F_NTUPLE flag + * was defined that this function was present. + */ +static int ixgbe_set_rx_ntuple(struct net_device *dev, + struct ethtool_rx_ntuple *cmd) +{ + return -EOPNOTSUPP; +} + +#endif +#define UDP_RSS_FLAGS (IXGBE_FLAG2_RSS_FIELD_IPV4_UDP | \ + IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) +static int ixgbe_set_rss_hash_opt(struct ixgbe_adapter *adapter, + struct ethtool_rxnfc *nfc) +{ + u32 flags2 = adapter->flags2; + + /* + * RSS does not support anything other than hashing + * to queues on src and dst IPs and ports + */ + if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST | + RXH_L4_B_0_1 | RXH_L4_B_2_3)) + return -EINVAL; + + switch (nfc->flow_type) { + case TCP_V4_FLOW: + case TCP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + !(nfc->data & RXH_L4_B_0_1) || + !(nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + case UDP_V4_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV4_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV4_UDP; + break; + default: + return -EINVAL; + } + break; + case UDP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST)) + return -EINVAL; + switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) { + case 0: + flags2 &= ~IXGBE_FLAG2_RSS_FIELD_IPV6_UDP; + break; + case (RXH_L4_B_0_1 | RXH_L4_B_2_3): + flags2 |= IXGBE_FLAG2_RSS_FIELD_IPV6_UDP; + break; + default: + return -EINVAL; + } + break; + case AH_ESP_V4_FLOW: + case AH_V4_FLOW: + case ESP_V4_FLOW: + case SCTP_V4_FLOW: + case AH_ESP_V6_FLOW: + case AH_V6_FLOW: + case ESP_V6_FLOW: + case SCTP_V6_FLOW: + if (!(nfc->data & RXH_IP_SRC) || + !(nfc->data & RXH_IP_DST) || + (nfc->data & RXH_L4_B_0_1) || + (nfc->data & RXH_L4_B_2_3)) + return -EINVAL; + break; + default: + return -EINVAL; + } + + /* if we changed something we need to update flags */ + if (flags2 != adapter->flags2) { + struct ixgbe_hw *hw = &adapter->hw; + u32 mrqc = IXGBE_READ_REG(hw, IXGBE_MRQC); + + if ((flags2 & UDP_RSS_FLAGS) && + !(adapter->flags2 & UDP_RSS_FLAGS)) + e_warn(drv, "enabling UDP RSS: fragmented packets" + " may arrive out of order to the stack above\n"); + + adapter->flags2 = flags2; + + /* Perform hash on these packet types */ + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4 + | IXGBE_MRQC_RSS_FIELD_IPV4_TCP + | IXGBE_MRQC_RSS_FIELD_IPV6 + | IXGBE_MRQC_RSS_FIELD_IPV6_TCP; + + mrqc &= ~(IXGBE_MRQC_RSS_FIELD_IPV4_UDP | + IXGBE_MRQC_RSS_FIELD_IPV6_UDP); + + if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV4_UDP) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV4_UDP; + + if (flags2 & IXGBE_FLAG2_RSS_FIELD_IPV6_UDP) + mrqc |= IXGBE_MRQC_RSS_FIELD_IPV6_UDP; + + IXGBE_WRITE_REG(hw, IXGBE_MRQC, mrqc); + } + + return 0; +} + +static int ixgbe_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd) +{ + struct ixgbe_adapter *adapter = netdev_priv(dev); + int ret = -EOPNOTSUPP; + + switch (cmd->cmd) { + case ETHTOOL_SRXCLSRLINS: + ret = ixgbe_add_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_SRXCLSRLDEL: + ret = ixgbe_del_ethtool_fdir_entry(adapter, cmd); + break; + case ETHTOOL_SRXFH: + ret = ixgbe_set_rss_hash_opt(adapter, cmd); + break; + default: + break; + } + + return ret; +} + +#endif /* ETHTOOL_GRXRINGS */ +//static +struct ethtool_ops ixgbe_ethtool_ops = { + .get_settings = ixgbe_get_settings, + .set_settings = ixgbe_set_settings, + .get_drvinfo = ixgbe_get_drvinfo, + .get_regs_len = ixgbe_get_regs_len, + .get_regs = ixgbe_get_regs, + .get_wol = ixgbe_get_wol, + .set_wol = ixgbe_set_wol, + .nway_reset = ixgbe_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = ixgbe_get_eeprom_len, + .get_eeprom = ixgbe_get_eeprom, + .set_eeprom = ixgbe_set_eeprom, + .get_ringparam = ixgbe_get_ringparam, + .set_ringparam = ixgbe_set_ringparam, + .get_pauseparam = ixgbe_get_pauseparam, + .set_pauseparam = ixgbe_set_pauseparam, + .get_msglevel = ixgbe_get_msglevel, + .set_msglevel = ixgbe_set_msglevel, +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT + .self_test_count = ixgbe_diag_test_count, +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ + .self_test = ixgbe_diag_test, + .get_strings = ixgbe_get_strings, +#ifdef HAVE_ETHTOOL_SET_PHYS_ID + .set_phys_id = ixgbe_set_phys_id, +#else + .phys_id = ixgbe_phys_id, +#endif /* HAVE_ETHTOOL_SET_PHYS_ID */ +#ifndef HAVE_ETHTOOL_GET_SSET_COUNT + .get_stats_count = ixgbe_get_stats_count, +#else /* HAVE_ETHTOOL_GET_SSET_COUNT */ + .get_sset_count = ixgbe_get_sset_count, +#endif /* HAVE_ETHTOOL_GET_SSET_COUNT */ + .get_ethtool_stats = ixgbe_get_ethtool_stats, +#ifdef HAVE_ETHTOOL_GET_PERM_ADDR + .get_perm_addr = ethtool_op_get_perm_addr, +#endif + .get_coalesce = ixgbe_get_coalesce, + .set_coalesce = ixgbe_set_coalesce, +#ifndef HAVE_NDO_SET_FEATURES + .get_rx_csum = ixgbe_get_rx_csum, + .set_rx_csum = ixgbe_set_rx_csum, + .get_tx_csum = ixgbe_get_tx_csum, + .set_tx_csum = ixgbe_set_tx_csum, + .get_sg = ethtool_op_get_sg, + .set_sg = ethtool_op_set_sg, +#ifdef NETIF_F_TSO + .get_tso = ethtool_op_get_tso, + .set_tso = ixgbe_set_tso, +#endif +#ifdef ETHTOOL_GFLAGS + .get_flags = ethtool_op_get_flags, + .set_flags = ixgbe_set_flags, +#endif +#endif /* HAVE_NDO_SET_FEATURES */ +#ifdef ETHTOOL_GRXRINGS + .get_rxnfc = ixgbe_get_rxnfc, + .set_rxnfc = ixgbe_set_rxnfc, +#ifdef ETHTOOL_SRXNTUPLE + .set_rx_ntuple = ixgbe_set_rx_ntuple, +#endif +#endif +}; + +void ixgbe_set_ethtool_ops(struct net_device *netdev) +{ + SET_ETHTOOL_OPS(netdev, &ixgbe_ethtool_ops); +} +#endif /* SIOCETHTOOL */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h new file mode 100644 index 00000000..cad28622 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_fcoe.h @@ -0,0 +1,91 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_FCOE_H +#define _IXGBE_FCOE_H + +#ifdef IXGBE_FCOE + +#include +#include + +/* shift bits within STAT fo FCSTAT */ +#define IXGBE_RXDADV_FCSTAT_SHIFT 4 + +/* ddp user buffer */ +#define IXGBE_BUFFCNT_MAX 256 /* 8 bits bufcnt */ +#define IXGBE_FCPTR_ALIGN 16 +#define IXGBE_FCPTR_MAX (IXGBE_BUFFCNT_MAX * sizeof(dma_addr_t)) +#define IXGBE_FCBUFF_4KB 0x0 +#define IXGBE_FCBUFF_8KB 0x1 +#define IXGBE_FCBUFF_16KB 0x2 +#define IXGBE_FCBUFF_64KB 0x3 +#define IXGBE_FCBUFF_MAX 65536 /* 64KB max */ +#define IXGBE_FCBUFF_MIN 4096 /* 4KB min */ +#define IXGBE_FCOE_DDP_MAX 512 /* 9 bits xid */ + +/* Default traffic class to use for FCoE */ +#define IXGBE_FCOE_DEFTC 3 + +/* fcerr */ +#define IXGBE_FCERR_BADCRC 0x00100000 +#define IXGBE_FCERR_EOFSOF 0x00200000 +#define IXGBE_FCERR_NOFIRST 0x00300000 +#define IXGBE_FCERR_OOOSEQ 0x00400000 +#define IXGBE_FCERR_NODMA 0x00500000 +#define IXGBE_FCERR_PKTLOST 0x00600000 + +/* FCoE DDP for target mode */ +#define __IXGBE_FCOE_TARGET 1 + +struct ixgbe_fcoe_ddp { + int len; + u32 err; + unsigned int sgc; + struct scatterlist *sgl; + dma_addr_t udp; + u64 *udl; + struct pci_pool *pool; +}; + +struct ixgbe_fcoe { + struct pci_pool **pool; + atomic_t refcnt; + spinlock_t lock; + struct ixgbe_fcoe_ddp ddp[IXGBE_FCOE_DDP_MAX]; + unsigned char *extra_ddp_buffer; + dma_addr_t extra_ddp_buffer_dma; + u64 __percpu *pcpu_noddp; + u64 __percpu *pcpu_noddp_ext_buff; + unsigned long mode; + u8 tc; + u8 up; + u8 up_set; +}; +#endif /* IXGBE_FCOE */ + +#endif /* _IXGBE_FCOE_H */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c new file mode 100644 index 00000000..8c1d2fe3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_main.c @@ -0,0 +1,2970 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +/****************************************************************************** + Copyright (c)2006 - 2007 Myricom, Inc. for some LRO specific code +******************************************************************************/ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef HAVE_SCTP +#include +#endif +#include +#include +#ifdef NETIF_F_TSO +#include +#ifdef NETIF_F_TSO6 +#include +#endif +#endif +#ifdef SIOCETHTOOL +#include +#endif + +#include "ixgbe.h" + +#undef CONFIG_DCA +#undef CONFIG_DCA_MODULE + +char ixgbe_driver_name[] = "ixgbe"; +static const char ixgbe_driver_string[] = + "Intel(R) 10 Gigabit PCI Express Network Driver"; +#define DRV_HW_PERF + +#ifndef CONFIG_IXGBE_NAPI +#define DRIVERNAPI +#else +#define DRIVERNAPI "-NAPI" +#endif + +#define FPGA + +#define VMDQ_TAG + +#define MAJ 3 +#define MIN 9 +#define BUILD 17 +#define DRV_VERSION __stringify(MAJ) "." __stringify(MIN) "." \ + __stringify(BUILD) DRIVERNAPI DRV_HW_PERF FPGA VMDQ_TAG +const char ixgbe_driver_version[] = DRV_VERSION; +static const char ixgbe_copyright[] = + "Copyright (c) 1999-2012 Intel Corporation."; + +/* ixgbe_pci_tbl - PCI Device ID Table + * + * Wildcard entries (PCI_ANY_ID) should come last + * Last entry must be all 0s + * + * { Vendor ID, Device ID, SubVendor ID, SubDevice ID, + * Class, Class Mask, private data (not used) } + */ +DEFINE_PCI_DEVICE_TABLE(ixgbe_pci_tbl) = { + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_DUAL_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AF_SINGLE_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598AT2)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_CX4)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_CX4_DUAL_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_DA_DUAL_PORT)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_XF_LR)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598EB_SFP_LOM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82598_BX)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_XAUI_LOM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KR)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_EM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_KX4_MEZZ)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_CX4)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_BACKPLANE_FCOE)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_FCOE)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_T3_LOM)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_COMBO_BACKPLANE)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_X540T)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_SFP_SF2)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_LS)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599EN_SFP)}, + {PCI_VDEVICE(INTEL, IXGBE_DEV_ID_82599_QSFP_SF_QP)}, + /* required last entry */ + {0, } +}; + +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) +static int ixgbe_notify_dca(struct notifier_block *, unsigned long event, + void *p); +static struct notifier_block dca_notifier = { + .notifier_call = ixgbe_notify_dca, + .next = NULL, + .priority = 0 +}; + +#endif +MODULE_AUTHOR("Intel Corporation, "); +MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); +MODULE_LICENSE("GPL"); +MODULE_VERSION(DRV_VERSION); + +#define DEFAULT_DEBUG_LEVEL_SHIFT 3 + + +static void ixgbe_release_hw_control(struct ixgbe_adapter *adapter) +{ + u32 ctrl_ext; + + /* Let firmware take over control of h/w */ + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, + ctrl_ext & ~IXGBE_CTRL_EXT_DRV_LOAD); +} + +#ifdef NO_VNIC +static void ixgbe_get_hw_control(struct ixgbe_adapter *adapter) +{ + u32 ctrl_ext; + + /* Let firmware know the driver has taken over */ + ctrl_ext = IXGBE_READ_REG(&adapter->hw, IXGBE_CTRL_EXT); + IXGBE_WRITE_REG(&adapter->hw, IXGBE_CTRL_EXT, + ctrl_ext | IXGBE_CTRL_EXT_DRV_LOAD); +} +#endif + + +static void ixgbe_update_xoff_rx_lfc(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + int i; + u32 data; + + if ((hw->fc.current_mode != ixgbe_fc_full) && + (hw->fc.current_mode != ixgbe_fc_rx_pause)) + return; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXC); + break; + default: + data = IXGBE_READ_REG(hw, IXGBE_LXOFFRXCNT); + } + hwstats->lxoffrxc += data; + + /* refill credits (no tx hang) if we received xoff */ + if (!data) + return; + + for (i = 0; i < adapter->num_tx_queues; i++) + clear_bit(__IXGBE_HANG_CHECK_ARMED, + &adapter->tx_ring[i]->state); +} + +static void ixgbe_update_xoff_received(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + u32 xoff[8] = {0}; + int i; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + +#ifdef HAVE_DCBNL_IEEE + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); + +#endif + if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED) || !pfc_en) { + ixgbe_update_xoff_rx_lfc(adapter); + return; + } + + /* update stats for each tc, only valid with PFC enabled */ + for (i = 0; i < MAX_TX_PACKET_BUFFERS; i++) { + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXC(i)); + break; + default: + xoff[i] = IXGBE_READ_REG(hw, IXGBE_PXOFFRXCNT(i)); + } + hwstats->pxoffrxc[i] += xoff[i]; + } + + /* disarm tx queues that have received xoff frames */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; + u8 tc = tx_ring->dcb_tc; + + if ((tc <= 7) && (xoff[tc])) + clear_bit(__IXGBE_HANG_CHECK_ARMED, &tx_ring->state); + } +} + + + + +#define IXGBE_SRRCTL_BSIZEHDRSIZE_SHIFT 2 + + + + +#ifdef HAVE_8021P_SUPPORT +/** + * ixgbe_vlan_stripping_disable - helper to disable vlan tag stripping + * @adapter: driver data + */ +void ixgbe_vlan_stripping_disable(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vlnctrl; + int i; + + /* leave vlan tag stripping enabled for DCB */ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + return; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl &= ~IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + for (i = 0; i < adapter->num_rx_queues; i++) { + u8 reg_idx = adapter->rx_ring[i]->reg_idx; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); + vlnctrl &= ~IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl); + } + break; + default: + break; + } +} + +#endif +/** + * ixgbe_vlan_stripping_enable - helper to enable vlan tag stripping + * @adapter: driver data + */ +void ixgbe_vlan_stripping_enable(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 vlnctrl; + int i; + + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + vlnctrl |= IXGBE_VLNCTRL_VME; + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + for (i = 0; i < adapter->num_rx_queues; i++) { + u8 reg_idx = adapter->rx_ring[i]->reg_idx; + vlnctrl = IXGBE_READ_REG(hw, IXGBE_RXDCTL(reg_idx)); + vlnctrl |= IXGBE_RXDCTL_VME; + IXGBE_WRITE_REG(hw, IXGBE_RXDCTL(reg_idx), vlnctrl); + } + break; + default: + break; + } +} + +#ifdef HAVE_VLAN_RX_REGISTER +void ixgbe_vlan_mode(struct net_device *netdev, struct vlan_group *grp) +#else +void ixgbe_vlan_mode(struct net_device *netdev, u32 features) +#endif +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); +#ifdef HAVE_8021P_SUPPORT + bool enable; +#endif +#ifdef HAVE_VLAN_RX_REGISTER + + //if (!test_bit(__IXGBE_DOWN, &adapter->state)) + // ixgbe_irq_disable(adapter); + + adapter->vlgrp = grp; + + //if (!test_bit(__IXGBE_DOWN, &adapter->state)) + // ixgbe_irq_enable(adapter, true, true); +#endif +#ifdef HAVE_8021P_SUPPORT +#ifdef HAVE_VLAN_RX_REGISTER + enable = (grp || (adapter->flags & IXGBE_FLAG_DCB_ENABLED)); +#else + enable = !!(features & NETIF_F_HW_VLAN_RX); +#endif + if (enable) + /* enable VLAN tag insert/strip */ + ixgbe_vlan_stripping_enable(adapter); + else + /* disable VLAN tag insert/strip */ + ixgbe_vlan_stripping_disable(adapter); + +#endif +} + +static u8 *ixgbe_addr_list_itr(struct ixgbe_hw *hw, u8 **mc_addr_ptr, u32 *vmdq) +{ +#ifdef NETDEV_HW_ADDR_T_MULTICAST + struct netdev_hw_addr *mc_ptr; +#else + struct dev_mc_list *mc_ptr; +#endif + struct ixgbe_adapter *adapter = hw->back; + u8 *addr = *mc_addr_ptr; + + *vmdq = adapter->num_vfs; + +#ifdef NETDEV_HW_ADDR_T_MULTICAST + mc_ptr = container_of(addr, struct netdev_hw_addr, addr[0]); + if (mc_ptr->list.next) { + struct netdev_hw_addr *ha; + + ha = list_entry(mc_ptr->list.next, struct netdev_hw_addr, list); + *mc_addr_ptr = ha->addr; + } +#else + mc_ptr = container_of(addr, struct dev_mc_list, dmi_addr[0]); + if (mc_ptr->next) + *mc_addr_ptr = mc_ptr->next->dmi_addr; +#endif + else + *mc_addr_ptr = NULL; + + return addr; +} + +/** + * ixgbe_write_mc_addr_list - write multicast addresses to MTA + * @netdev: network interface device structure + * + * Writes multicast address list to the MTA hash table. + * Returns: -ENOMEM on failure + * 0 on no addresses written + * X on writing X addresses to MTA + **/ +int ixgbe_write_mc_addr_list(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; +#ifdef NETDEV_HW_ADDR_T_MULTICAST + struct netdev_hw_addr *ha; +#endif + u8 *addr_list = NULL; + int addr_count = 0; + + if (!hw->mac.ops.update_mc_addr_list) + return -ENOMEM; + + if (!netif_running(netdev)) + return 0; + + + hw->mac.ops.update_mc_addr_list(hw, NULL, 0, + ixgbe_addr_list_itr, true); + + if (!netdev_mc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_MULTICAST + ha = list_first_entry(&netdev->mc.list, + struct netdev_hw_addr, list); + addr_list = ha->addr; +#else + addr_list = netdev->mc_list->dmi_addr; +#endif + addr_count = netdev_mc_count(netdev); + + hw->mac.ops.update_mc_addr_list(hw, addr_list, addr_count, + ixgbe_addr_list_itr, false); + } + +#ifdef CONFIG_PCI_IOV + //ixgbe_restore_vf_multicasts(adapter); +#endif + return addr_count; +} + + +void ixgbe_full_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) { + hw->mac.ops.set_rar(hw, i, adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + } else { + hw->mac.ops.clear_rar(hw, i); + } + } +} + +void ixgbe_sync_mac_table(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_MODIFIED) { + if (adapter->mac_table[i].state & + IXGBE_MAC_STATE_IN_USE) { + hw->mac.ops.set_rar(hw, i, + adapter->mac_table[i].addr, + adapter->mac_table[i].queue, + IXGBE_RAH_AV); + } else { + hw->mac.ops.clear_rar(hw, i); + } + adapter->mac_table[i].state &= + ~(IXGBE_MAC_STATE_MODIFIED); + } + } +} + +int ixgbe_available_rars(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i, count = 0; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state == 0) + count++; + } + return count; +} + +int ixgbe_add_mac_filter(struct ixgbe_adapter *adapter, u8 *addr, u16 queue) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + if (is_zero_ether_addr(addr)) + return 0; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (adapter->mac_table[i].state & IXGBE_MAC_STATE_IN_USE) + continue; + adapter->mac_table[i].state |= (IXGBE_MAC_STATE_MODIFIED | + IXGBE_MAC_STATE_IN_USE); + memcpy(adapter->mac_table[i].addr, addr, ETH_ALEN); + adapter->mac_table[i].queue = queue; + ixgbe_sync_mac_table(adapter); + return i; + } + return -ENOMEM; +} + +void ixgbe_flush_sw_mac_table(struct ixgbe_adapter *adapter) +{ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + for (i = 0; i < hw->mac.num_rar_entries; i++) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + } + ixgbe_sync_mac_table(adapter); +} + +void ixgbe_del_mac_filter_by_index(struct ixgbe_adapter *adapter, int index) +{ + adapter->mac_table[index].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[index].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[index].addr, 0, ETH_ALEN); + adapter->mac_table[index].queue = 0; + ixgbe_sync_mac_table(adapter); +} + +int ixgbe_del_mac_filter(struct ixgbe_adapter *adapter, u8* addr, u16 queue) +{ + /* search table for addr, if found, set to 0 and sync */ + int i; + struct ixgbe_hw *hw = &adapter->hw; + + if (is_zero_ether_addr(addr)) + return 0; + for (i = 0; i < hw->mac.num_rar_entries; i++) { + if (ether_addr_equal(addr, adapter->mac_table[i].addr) && + adapter->mac_table[i].queue == queue) { + adapter->mac_table[i].state |= IXGBE_MAC_STATE_MODIFIED; + adapter->mac_table[i].state &= ~IXGBE_MAC_STATE_IN_USE; + memset(adapter->mac_table[i].addr, 0, ETH_ALEN); + adapter->mac_table[i].queue = 0; + ixgbe_sync_mac_table(adapter); + return 0; + } + } + return -ENOMEM; +} +#ifdef HAVE_SET_RX_MODE +/** + * ixgbe_write_uc_addr_list - write unicast addresses to RAR table + * @netdev: network interface device structure + * + * Writes unicast address list to the RAR table. + * Returns: -ENOMEM on failure/insufficient address space + * 0 on no addresses written + * X on writing X addresses to the RAR table + **/ +int ixgbe_write_uc_addr_list(struct ixgbe_adapter *adapter, + struct net_device *netdev, unsigned int vfn) +{ + int count = 0; + + /* return ENOMEM indicating insufficient memory for addresses */ + if (netdev_uc_count(netdev) > ixgbe_available_rars(adapter)) + return -ENOMEM; + + if (!netdev_uc_empty(netdev)) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + struct netdev_hw_addr *ha; +#else + struct dev_mc_list *ha; +#endif + netdev_for_each_uc_addr(ha, netdev) { +#ifdef NETDEV_HW_ADDR_T_UNICAST + ixgbe_del_mac_filter(adapter, ha->addr, (u16)vfn); + ixgbe_add_mac_filter(adapter, ha->addr, (u16)vfn); +#else + ixgbe_del_mac_filter(adapter, ha->da_addr, (u16)vfn); + ixgbe_add_mac_filter(adapter, ha->da_addr, (u16)vfn); +#endif + count++; + } + } + return count; +} + +#endif +/** + * ixgbe_set_rx_mode - Unicast, Multicast and Promiscuous mode set + * @netdev: network interface device structure + * + * The set_rx_method entry point is called whenever the unicast/multicast + * address list or the network interface flags are updated. This routine is + * responsible for configuring the hardware for proper unicast, multicast and + * promiscuous mode. + **/ +void ixgbe_set_rx_mode(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + u32 fctrl, vmolr = IXGBE_VMOLR_BAM | IXGBE_VMOLR_AUPE; + u32 vlnctrl; + int count; + + /* Check for Promiscuous and All Multicast modes */ + fctrl = IXGBE_READ_REG(hw, IXGBE_FCTRL); + vlnctrl = IXGBE_READ_REG(hw, IXGBE_VLNCTRL); + + /* set all bits that we expect to always be set */ + fctrl |= IXGBE_FCTRL_BAM; + fctrl |= IXGBE_FCTRL_DPF; /* discard pause frames when FC enabled */ + fctrl |= IXGBE_FCTRL_PMCF; + + /* clear the bits we are changing the status of */ + fctrl &= ~(IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + vlnctrl &= ~(IXGBE_VLNCTRL_VFE | IXGBE_VLNCTRL_CFIEN); + + if (netdev->flags & IFF_PROMISC) { + hw->addr_ctrl.user_set_promisc = true; + fctrl |= (IXGBE_FCTRL_UPE | IXGBE_FCTRL_MPE); + vmolr |= IXGBE_VMOLR_MPE; + } else { + if (netdev->flags & IFF_ALLMULTI) { + fctrl |= IXGBE_FCTRL_MPE; + vmolr |= IXGBE_VMOLR_MPE; + } else { + /* + * Write addresses to the MTA, if the attempt fails + * then we should just turn on promiscuous mode so + * that we can at least receive multicast traffic + */ + count = ixgbe_write_mc_addr_list(netdev); + if (count < 0) { + fctrl |= IXGBE_FCTRL_MPE; + vmolr |= IXGBE_VMOLR_MPE; + } else if (count) { + vmolr |= IXGBE_VMOLR_ROMPE; + } + } +#ifdef NETIF_F_HW_VLAN_TX + /* enable hardware vlan filtering */ + vlnctrl |= IXGBE_VLNCTRL_VFE; +#endif + hw->addr_ctrl.user_set_promisc = false; +#ifdef HAVE_SET_RX_MODE + /* + * Write addresses to available RAR registers, if there is not + * sufficient space to store all the addresses then enable + * unicast promiscuous mode + */ + count = ixgbe_write_uc_addr_list(adapter, netdev, + adapter->num_vfs); + if (count < 0) { + fctrl |= IXGBE_FCTRL_UPE; + vmolr |= IXGBE_VMOLR_ROPE; + } +#endif + } + + if (hw->mac.type != ixgbe_mac_82598EB) { + vmolr |= IXGBE_READ_REG(hw, IXGBE_VMOLR(adapter->num_vfs)) & + ~(IXGBE_VMOLR_MPE | IXGBE_VMOLR_ROMPE | + IXGBE_VMOLR_ROPE); + IXGBE_WRITE_REG(hw, IXGBE_VMOLR(adapter->num_vfs), vmolr); + } + + IXGBE_WRITE_REG(hw, IXGBE_VLNCTRL, vlnctrl); + IXGBE_WRITE_REG(hw, IXGBE_FCTRL, fctrl); +} + + + + + + + + +/* Additional bittime to account for IXGBE framing */ +#define IXGBE_ETH_FRAMING 20 + +/* + * ixgbe_hpbthresh - calculate high water mark for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_hpbthresh(struct ixgbe_adapter *adapter, int pb) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int link, tc, kb, marker; + u32 dv_id, rx_pba; + + /* Calculate max LAN frame size */ + tc = link = dev->mtu + ETH_HLEN + ETH_FCS_LEN + IXGBE_ETH_FRAMING; + +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if (dev->features & NETIF_F_FCOE_MTU) { + int fcoe_pb = 0; + + fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); + + if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; + } +#endif + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_DV_X540(link, tc); + break; + default: + dv_id = IXGBE_DV(link, tc); + break; + } + + /* Loopback switch introduces additional latency */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + dv_id += IXGBE_B2BT(tc); + + /* Delay value is calculated in bit times convert to KB */ + kb = IXGBE_BT2KB(dv_id); + rx_pba = IXGBE_READ_REG(hw, IXGBE_RXPBSIZE(pb)) >> 10; + + marker = rx_pba - kb; + + /* It is possible that the packet buffer is not large enough + * to provide required headroom. In this case throw an error + * to user and a do the best we can. + */ + if (marker < 0) { + e_warn(drv, "Packet Buffer(%i) can not provide enough" + "headroom to suppport flow control." + "Decrease MTU or number of traffic classes\n", pb); + marker = tc + 1; + } + + return marker; +} + +/* + * ixgbe_lpbthresh - calculate low water mark for for flow control + * + * @adapter: board private structure to calculate for + * @pb - packet buffer to calculate + */ +static int ixgbe_lpbthresh(struct ixgbe_adapter *adapter, int pb) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *dev = adapter->netdev; + int tc; + u32 dv_id; + + /* Calculate max LAN frame size */ + tc = dev->mtu + ETH_HLEN + ETH_FCS_LEN; + +#ifdef IXGBE_FCOE + /* FCoE traffic class uses FCOE jumbo frames */ + if (dev->features & NETIF_F_FCOE_MTU) { + int fcoe_pb = 0; + + fcoe_pb = netdev_get_prio_tc_map(dev, adapter->fcoe.up); + + if (fcoe_pb == pb && tc < IXGBE_FCOE_JUMBO_FRAME_SIZE) + tc = IXGBE_FCOE_JUMBO_FRAME_SIZE; + } +#endif + + /* Calculate delay value for device */ + switch (hw->mac.type) { + case ixgbe_mac_X540: + dv_id = IXGBE_LOW_DV_X540(tc); + break; + default: + dv_id = IXGBE_LOW_DV(tc); + break; + } + + /* Delay value is calculated in bit times convert to KB */ + return IXGBE_BT2KB(dv_id); +} + +/* + * ixgbe_pbthresh_setup - calculate and setup high low water marks + */ +static void ixgbe_pbthresh_setup(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int num_tc = netdev_get_num_tc(adapter->netdev); + int i; + + if (!num_tc) + num_tc = 1; + if (num_tc > IXGBE_DCB_MAX_TRAFFIC_CLASS) + num_tc = IXGBE_DCB_MAX_TRAFFIC_CLASS; + + for (i = 0; i < num_tc; i++) { + hw->fc.high_water[i] = ixgbe_hpbthresh(adapter, i); + hw->fc.low_water[i] = ixgbe_lpbthresh(adapter, i); + + /* Low water marks must not be larger than high water marks */ + if (hw->fc.low_water[i] > hw->fc.high_water[i]) + hw->fc.low_water[i] = 0; + } + + for (; i < IXGBE_DCB_MAX_TRAFFIC_CLASS; i++) + hw->fc.high_water[i] = 0; +} + + + +#ifdef NO_VNIC +static void ixgbe_configure(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + + ixgbe_configure_pb(adapter); + ixgbe_configure_dcb(adapter); + + ixgbe_set_rx_mode(adapter->netdev); +#ifdef NETIF_F_HW_VLAN_TX + ixgbe_restore_vlan(adapter); +#endif + +#ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) + ixgbe_configure_fcoe(adapter); + +#endif /* IXGBE_FCOE */ + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + hw->mac.ops.disable_sec_rx_path(hw); + + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + ixgbe_init_fdir_signature_82599(&adapter->hw, + adapter->fdir_pballoc); + } else if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + ixgbe_init_fdir_perfect_82599(&adapter->hw, + adapter->fdir_pballoc); + ixgbe_fdir_filter_restore(adapter); + } + + if (adapter->hw.mac.type != ixgbe_mac_82598EB) + hw->mac.ops.enable_sec_rx_path(hw); + + ixgbe_configure_virtualization(adapter); + + ixgbe_configure_tx(adapter); + ixgbe_configure_rx(adapter); +} +#endif + +static bool ixgbe_is_sfp(struct ixgbe_hw *hw) +{ + switch (hw->phy.type) { + case ixgbe_phy_sfp_avago: + case ixgbe_phy_sfp_ftl: + case ixgbe_phy_sfp_intel: + case ixgbe_phy_sfp_unknown: + case ixgbe_phy_sfp_passive_tyco: + case ixgbe_phy_sfp_passive_unknown: + case ixgbe_phy_sfp_active_unknown: + case ixgbe_phy_sfp_ftl_active: + return true; + case ixgbe_phy_nl: + if (hw->mac.type == ixgbe_mac_82598EB) + return true; + default: + return false; + } +} + + +/** + * ixgbe_clear_vf_stats_counters - Clear out VF stats after reset + * @adapter: board private structure + * + * On a reset we need to clear out the VF stats or accounting gets + * messed up because they're not clear on read. + **/ +void ixgbe_clear_vf_stats_counters(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + int i; + + for (i = 0; i < adapter->num_vfs; i++) { + adapter->vfinfo[i].last_vfstats.gprc = + IXGBE_READ_REG(hw, IXGBE_PVFGPRC(i)); + adapter->vfinfo[i].saved_rst_vfstats.gprc += + adapter->vfinfo[i].vfstats.gprc; + adapter->vfinfo[i].vfstats.gprc = 0; + adapter->vfinfo[i].last_vfstats.gptc = + IXGBE_READ_REG(hw, IXGBE_PVFGPTC(i)); + adapter->vfinfo[i].saved_rst_vfstats.gptc += + adapter->vfinfo[i].vfstats.gptc; + adapter->vfinfo[i].vfstats.gptc = 0; + adapter->vfinfo[i].last_vfstats.gorc = + IXGBE_READ_REG(hw, IXGBE_PVFGORC_LSB(i)); + adapter->vfinfo[i].saved_rst_vfstats.gorc += + adapter->vfinfo[i].vfstats.gorc; + adapter->vfinfo[i].vfstats.gorc = 0; + adapter->vfinfo[i].last_vfstats.gotc = + IXGBE_READ_REG(hw, IXGBE_PVFGOTC_LSB(i)); + adapter->vfinfo[i].saved_rst_vfstats.gotc += + adapter->vfinfo[i].vfstats.gotc; + adapter->vfinfo[i].vfstats.gotc = 0; + adapter->vfinfo[i].last_vfstats.mprc = + IXGBE_READ_REG(hw, IXGBE_PVFMPRC(i)); + adapter->vfinfo[i].saved_rst_vfstats.mprc += + adapter->vfinfo[i].vfstats.mprc; + adapter->vfinfo[i].vfstats.mprc = 0; + } +} + + + +void ixgbe_reinit_locked(struct ixgbe_adapter *adapter) +{ +#ifdef NO_VNIC + WARN_ON(in_interrupt()); + /* put off any impending NetWatchDogTimeout */ + adapter->netdev->trans_start = jiffies; + + while (test_and_set_bit(__IXGBE_RESETTING, &adapter->state)) + usleep_range(1000, 2000); + ixgbe_down(adapter); + /* + * If SR-IOV enabled then wait a bit before bringing the adapter + * back up to give the VFs time to respond to the reset. The + * two second wait is based upon the watchdog timer cycle in + * the VF driver. + */ + if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + msleep(2000); + ixgbe_up(adapter); + clear_bit(__IXGBE_RESETTING, &adapter->state); +#endif +} + +void ixgbe_up(struct ixgbe_adapter *adapter) +{ + /* hardware has been reset, we need to reload some things */ + //ixgbe_configure(adapter); + + //ixgbe_up_complete(adapter); +} + +void ixgbe_reset(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct net_device *netdev = adapter->netdev; + int err; + + /* lock SFP init bit to prevent race conditions with the watchdog */ + while (test_and_set_bit(__IXGBE_IN_SFP_INIT, &adapter->state)) + usleep_range(1000, 2000); + + /* clear all SFP and link config related flags while holding SFP_INIT */ + adapter->flags2 &= ~(IXGBE_FLAG2_SEARCH_FOR_SFP | + IXGBE_FLAG2_SFP_NEEDS_RESET); + adapter->flags &= ~IXGBE_FLAG_NEED_LINK_CONFIG; + + err = hw->mac.ops.init_hw(hw); + switch (err) { + case 0: + case IXGBE_ERR_SFP_NOT_PRESENT: + case IXGBE_ERR_SFP_NOT_SUPPORTED: + break; + case IXGBE_ERR_MASTER_REQUESTS_PENDING: + e_dev_err("master disable timed out\n"); + break; + case IXGBE_ERR_EEPROM_VERSION: + /* We are running on a pre-production device, log a warning */ + e_dev_warn("This device is a pre-production adapter/LOM. " + "Please be aware there may be issues associated " + "with your hardware. If you are experiencing " + "problems please contact your Intel or hardware " + "representative who provided you with this " + "hardware.\n"); + break; + default: + e_dev_err("Hardware Error: %d\n", err); + } + + clear_bit(__IXGBE_IN_SFP_INIT, &adapter->state); + + ixgbe_flush_sw_mac_table(adapter); + memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr, + netdev->addr_len); + adapter->mac_table[0].queue = adapter->num_vfs; + adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | + IXGBE_MAC_STATE_IN_USE); + hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, + adapter->mac_table[0].queue, + IXGBE_RAH_AV); +} + + + + + + +void ixgbe_down(struct ixgbe_adapter *adapter) +{ +#ifdef NO_VNIC + struct net_device *netdev = adapter->netdev; + struct ixgbe_hw *hw = &adapter->hw; + u32 rxctrl; + int i; + + /* signal that we are down to the interrupt handler */ + set_bit(__IXGBE_DOWN, &adapter->state); + + /* disable receives */ + rxctrl = IXGBE_READ_REG(hw, IXGBE_RXCTRL); + IXGBE_WRITE_REG(hw, IXGBE_RXCTRL, rxctrl & ~IXGBE_RXCTRL_RXEN); + + /* disable all enabled rx queues */ + for (i = 0; i < adapter->num_rx_queues; i++) + /* this call also flushes the previous write */ + ixgbe_disable_rx_queue(adapter, adapter->rx_ring[i]); + + usleep_range(10000, 20000); + + netif_tx_stop_all_queues(netdev); + + /* call carrier off first to avoid false dev_watchdog timeouts */ + netif_carrier_off(netdev); + netif_tx_disable(netdev); + + ixgbe_irq_disable(adapter); + + ixgbe_napi_disable_all(adapter); + + adapter->flags2 &= ~(IXGBE_FLAG2_FDIR_REQUIRES_REINIT | + IXGBE_FLAG2_RESET_REQUESTED); + adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; + + del_timer_sync(&adapter->service_timer); + + if (adapter->num_vfs) { + /* Clear EITR Select mapping */ + IXGBE_WRITE_REG(&adapter->hw, IXGBE_EITRSEL, 0); + + /* Mark all the VFs as inactive */ + for (i = 0 ; i < adapter->num_vfs; i++) + adapter->vfinfo[i].clear_to_send = 0; + + /* ping all the active vfs to let them know we are going down */ + ixgbe_ping_all_vfs(adapter); + + /* Disable all VFTE/VFRE TX/RX */ + ixgbe_disable_tx_rx(adapter); + } + + /* disable transmits in the hardware now that interrupts are off */ + for (i = 0; i < adapter->num_tx_queues; i++) { + u8 reg_idx = adapter->tx_ring[i]->reg_idx; + IXGBE_WRITE_REG(hw, IXGBE_TXDCTL(reg_idx), IXGBE_TXDCTL_SWFLSH); + } + + /* Disable the Tx DMA engine on 82599 and X540 */ + switch (hw->mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + IXGBE_WRITE_REG(hw, IXGBE_DMATXCTL, + (IXGBE_READ_REG(hw, IXGBE_DMATXCTL) & + ~IXGBE_DMATXCTL_TE)); + break; + default: + break; + } + +#ifdef HAVE_PCI_ERS + if (!pci_channel_offline(adapter->pdev)) +#endif + ixgbe_reset(adapter); + /* power down the optics */ + if ((hw->phy.multispeed_fiber) || + ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && + (hw->mac.type == ixgbe_mac_82599EB))) + ixgbe_disable_tx_laser(hw); + + ixgbe_clean_all_tx_rings(adapter); + ixgbe_clean_all_rx_rings(adapter); + +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) + /* since we reset the hardware DCA settings were cleared */ + ixgbe_setup_dca(adapter); +#endif + +#endif /* NO_VNIC */ +} + +#ifndef NO_VNIC + +#undef IXGBE_FCOE + +/* Artificial max queue cap per traffic class in DCB mode */ +#define DCB_QUEUE_CAP 8 + +/** + * ixgbe_set_dcb_queues: Allocate queues for a DCB-enabled device + * @adapter: board private structure to initialize + * + * When DCB (Data Center Bridging) is enabled, allocate queues for + * each traffic class. If multiqueue isn't available,then abort DCB + * initialization. + * + * This function handles all combinations of DCB, RSS, and FCoE. + * + **/ +static bool ixgbe_set_dcb_queues(struct ixgbe_adapter *adapter) +{ + int tcs; +#ifdef HAVE_MQPRIO + int rss_i, i, offset = 0; + struct net_device *dev = adapter->netdev; + + /* Map queue offset and counts onto allocated tx queues */ + tcs = netdev_get_num_tc(dev); + + if (!tcs) + return false; + + rss_i = min_t(int, dev->num_tx_queues / tcs, num_online_cpus()); + + if (rss_i > DCB_QUEUE_CAP) + rss_i = DCB_QUEUE_CAP; + + for (i = 0; i < tcs; i++) { + netdev_set_tc_queue(dev, i, rss_i, offset); + offset += rss_i; + } + + adapter->num_tx_queues = rss_i * tcs; + adapter->num_rx_queues = rss_i * tcs; + +#ifdef IXGBE_FCOE + /* FCoE enabled queues require special configuration indexed + * by feature specific indices and mask. Here we map FCoE + * indices onto the DCB queue pairs allowing FCoE to own + * configuration later. + */ + + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { + struct ixgbe_ring_feature *f; + int tc; + u8 prio_tc[IXGBE_DCB_MAX_USER_PRIORITY] = {0}; + + ixgbe_dcb_unpack_map_cee(&adapter->dcb_cfg, + IXGBE_DCB_TX_CONFIG, + prio_tc); + tc = prio_tc[adapter->fcoe.up]; + + f = &adapter->ring_feature[RING_F_FCOE]; + f->indices = min_t(int, rss_i, f->indices); + f->mask = rss_i * tc; + } +#endif /* IXGBE_FCOE */ +#else + if (!(adapter->flags & IXGBE_FLAG_DCB_ENABLED)) + return false; + + /* Enable one Queue per traffic class */ + tcs = adapter->tc; + if (!tcs) + return false; + +#ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { + struct ixgbe_ring_feature *f; + int tc = netdev_get_prio_tc_map(adapter->netdev, + adapter->fcoe.up); + + f = &adapter->ring_feature[RING_F_FCOE]; + + /* + * We have max 8 queues for FCoE, where 8 the is + * FCoE redirection table size. We must also share + * ring resources with network traffic so if FCoE TC is + * 4 or greater and we are in 8 TC mode we can only use + * 7 queues. + */ + if ((tcs > 4) && (tc >= 4) && (f->indices > 7)) + f->indices = 7; + + f->indices = min_t(int, num_online_cpus(), f->indices); + f->mask = tcs; + + adapter->num_rx_queues = f->indices + tcs; + adapter->num_tx_queues = f->indices + tcs; + + return true; + } + +#endif /* IXGBE_FCOE */ + adapter->num_rx_queues = tcs; + adapter->num_tx_queues = tcs; +#endif /* HAVE_MQ */ + + return true; +} + +/** + * ixgbe_set_vmdq_queues: Allocate queues for VMDq devices + * @adapter: board private structure to initialize + * + * When VMDq (Virtual Machine Devices queue) is enabled, allocate queues + * and VM pools where appropriate. If RSS is available, then also try and + * enable RSS and map accordingly. + * + **/ +static bool ixgbe_set_vmdq_queues(struct ixgbe_adapter *adapter) +{ + int vmdq_i = adapter->ring_feature[RING_F_VMDQ].indices; + int vmdq_m = 0; + int rss_i = adapter->ring_feature[RING_F_RSS].indices; + unsigned long i; + int rss_shift; + bool ret = false; + + + switch (adapter->flags & (IXGBE_FLAG_RSS_ENABLED + | IXGBE_FLAG_DCB_ENABLED + | IXGBE_FLAG_VMDQ_ENABLED)) { + + case (IXGBE_FLAG_RSS_ENABLED | IXGBE_FLAG_VMDQ_ENABLED): + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + vmdq_i = min((int)IXGBE_MAX_VMDQ_INDICES, vmdq_i); + if (vmdq_i > 32) + rss_i = 2; + else + rss_i = 4; + i = rss_i; + rss_shift = find_first_bit(&i, sizeof(i) * 8); + vmdq_m = ((IXGBE_MAX_VMDQ_INDICES - 1) << + rss_shift) & (MAX_RX_QUEUES - 1); + break; + default: + break; + } + adapter->num_rx_queues = vmdq_i * rss_i; + adapter->num_tx_queues = min((int)MAX_TX_QUEUES, vmdq_i * rss_i); + ret = true; + break; + + case (IXGBE_FLAG_VMDQ_ENABLED): + switch (adapter->hw.mac.type) { + case ixgbe_mac_82598EB: + vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + vmdq_m = (IXGBE_MAX_VMDQ_INDICES - 1) << 1; + break; + default: + break; + } + adapter->num_rx_queues = vmdq_i; + adapter->num_tx_queues = vmdq_i; + ret = true; + break; + + default: + ret = false; + goto vmdq_queues_out; + } + + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + adapter->num_rx_pools = vmdq_i; + adapter->num_rx_queues_per_pool = adapter->num_rx_queues / + vmdq_i; + } else { + adapter->num_rx_pools = adapter->num_rx_queues; + adapter->num_rx_queues_per_pool = 1; + } + /* save the mask for later use */ + adapter->ring_feature[RING_F_VMDQ].mask = vmdq_m; +vmdq_queues_out: + return ret; +} + +/** + * ixgbe_set_rss_queues: Allocate queues for RSS + * @adapter: board private structure to initialize + * + * This is our "base" multiqueue mode. RSS (Receive Side Scaling) will try + * to allocate one Rx queue per CPU, and if available, one Tx queue per CPU. + * + **/ +static bool ixgbe_set_rss_queues(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ring_feature *f; + + if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) { + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + return false; + } + + /* set mask for 16 queue limit of RSS */ + f = &adapter->ring_feature[RING_F_RSS]; + f->mask = 0xF; + + /* + * Use Flow Director in addition to RSS to ensure the best + * distribution of flows across cores, even when an FDIR flow + * isn't matched. + */ + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + f = &adapter->ring_feature[RING_F_FDIR]; + + f->indices = min_t(int, num_online_cpus(), f->indices); + f->mask = 0; + } + + adapter->num_rx_queues = f->indices; +#ifdef HAVE_TX_MQ + adapter->num_tx_queues = f->indices; +#endif + + return true; +} + +#ifdef IXGBE_FCOE +/** + * ixgbe_set_fcoe_queues: Allocate queues for Fiber Channel over Ethernet (FCoE) + * @adapter: board private structure to initialize + * + * FCoE RX FCRETA can use up to 8 rx queues for up to 8 different exchanges. + * The ring feature mask is not used as a mask for FCoE, as it can take any 8 + * rx queues out of the max number of rx queues, instead, it is used as the + * index of the first rx queue used by FCoE. + * + **/ +static bool ixgbe_set_fcoe_queues(struct ixgbe_adapter *adapter) +{ + struct ixgbe_ring_feature *f; + + if (!(adapter->flags & IXGBE_FLAG_FCOE_ENABLED)) + return false; + + ixgbe_set_rss_queues(adapter); + + f = &adapter->ring_feature[RING_F_FCOE]; + f->indices = min_t(int, num_online_cpus(), f->indices); + + /* adding FCoE queues */ + f->mask = adapter->num_rx_queues; + adapter->num_rx_queues += f->indices; + adapter->num_tx_queues += f->indices; + + return true; +} + +#endif /* IXGBE_FCOE */ +/* + * ixgbe_set_num_queues: Allocate queues for device, feature dependent + * @adapter: board private structure to initialize + * + * This is the top level queue allocation routine. The order here is very + * important, starting with the "most" number of features turned on at once, + * and ending with the smallest set of features. This way large combinations + * can be allocated if they're turned on, and smaller combinations are the + * fallthrough conditions. + * + **/ +static void ixgbe_set_num_queues(struct ixgbe_adapter *adapter) +{ + /* Start with base case */ + adapter->num_rx_queues = 1; + adapter->num_tx_queues = 1; + adapter->num_rx_pools = adapter->num_rx_queues; + adapter->num_rx_queues_per_pool = 1; + + if (ixgbe_set_vmdq_queues(adapter)) + return; + + if (ixgbe_set_dcb_queues(adapter)) + return; + +#ifdef IXGBE_FCOE + if (ixgbe_set_fcoe_queues(adapter)) + return; + +#endif /* IXGBE_FCOE */ + ixgbe_set_rss_queues(adapter); +} + +#endif + + +/** + * ixgbe_sw_init - Initialize general software structures (struct ixgbe_adapter) + * @adapter: board private structure to initialize + * + * ixgbe_sw_init initializes the Adapter private data structure. + * Fields are initialized based on PCI device information and + * OS network device settings (MTU size). + **/ +static int ixgbe_sw_init(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + struct pci_dev *pdev = adapter->pdev; + int err; + + /* PCI config space info */ + + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + pci_read_config_byte(pdev, PCI_REVISION_ID, &hw->revision_id); + hw->subsystem_vendor_id = pdev->subsystem_vendor; + hw->subsystem_device_id = pdev->subsystem_device; + + err = ixgbe_init_shared_code(hw); + if (err) { + e_err(probe, "init_shared_code failed: %d\n", err); + goto out; + } + adapter->mac_table = kzalloc(sizeof(struct ixgbe_mac_addr) * + hw->mac.num_rar_entries, + GFP_ATOMIC); + /* Set capability flags */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + adapter->flags |= IXGBE_FLAG_MSI_CAPABLE | + IXGBE_FLAG_MSIX_CAPABLE | + IXGBE_FLAG_MQ_CAPABLE | + IXGBE_FLAG_RSS_CAPABLE; + adapter->flags |= IXGBE_FLAG_DCB_CAPABLE; +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) + adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; +#endif + adapter->flags &= ~IXGBE_FLAG_SRIOV_CAPABLE; + adapter->flags2 &= ~IXGBE_FLAG2_RSC_CAPABLE; + + if (hw->device_id == IXGBE_DEV_ID_82598AT) + adapter->flags |= IXGBE_FLAG_FAN_FAIL_CAPABLE; + + adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82598; + break; + case ixgbe_mac_X540: + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; + case ixgbe_mac_82599EB: + adapter->flags |= IXGBE_FLAG_MSI_CAPABLE | + IXGBE_FLAG_MSIX_CAPABLE | + IXGBE_FLAG_MQ_CAPABLE | + IXGBE_FLAG_RSS_CAPABLE; + adapter->flags |= IXGBE_FLAG_DCB_CAPABLE; +#if defined(CONFIG_DCA) || defined(CONFIG_DCA_MODULE) + adapter->flags |= IXGBE_FLAG_DCA_CAPABLE; +#endif + adapter->flags |= IXGBE_FLAG_SRIOV_CAPABLE; + adapter->flags2 |= IXGBE_FLAG2_RSC_CAPABLE; +#ifdef IXGBE_FCOE + adapter->flags |= IXGBE_FLAG_FCOE_CAPABLE; + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; + adapter->ring_feature[RING_F_FCOE].indices = 0; +#ifdef CONFIG_DCB + /* Default traffic class to use for FCoE */ + adapter->fcoe.tc = IXGBE_FCOE_DEFTC; + adapter->fcoe.up = IXGBE_FCOE_DEFTC; + adapter->fcoe.up_set = IXGBE_FCOE_DEFTC; +#endif +#endif + if (hw->device_id == IXGBE_DEV_ID_82599_T3_LOM) + adapter->flags2 |= IXGBE_FLAG2_TEMP_SENSOR_CAPABLE; +#ifndef IXGBE_NO_SMART_SPEED + hw->phy.smart_speed = ixgbe_smart_speed_on; +#else + hw->phy.smart_speed = ixgbe_smart_speed_off; +#endif + adapter->max_msix_q_vectors = IXGBE_MAX_MSIX_Q_VECTORS_82599; + default: + break; + } + + /* n-tuple support exists, always init our spinlock */ + //spin_lock_init(&adapter->fdir_perfect_lock); + + if (adapter->flags & IXGBE_FLAG_DCB_CAPABLE) { + int j; + struct ixgbe_dcb_tc_config *tc; + int dcb_i = IXGBE_DCB_MAX_TRAFFIC_CLASS; + + + adapter->dcb_cfg.num_tcs.pg_tcs = dcb_i; + adapter->dcb_cfg.num_tcs.pfc_tcs = dcb_i; + for (j = 0; j < dcb_i; j++) { + tc = &adapter->dcb_cfg.tc_config[j]; + tc->path[IXGBE_DCB_TX_CONFIG].bwg_id = 0; + tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent = 100 / dcb_i; + tc->path[IXGBE_DCB_RX_CONFIG].bwg_id = 0; + tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent = 100 / dcb_i; + tc->pfc = ixgbe_dcb_pfc_disabled; + if (j == 0) { + /* total of all TCs bandwidth needs to be 100 */ + tc->path[IXGBE_DCB_TX_CONFIG].bwg_percent += + 100 % dcb_i; + tc->path[IXGBE_DCB_RX_CONFIG].bwg_percent += + 100 % dcb_i; + } + } + + /* Initialize default user to priority mapping, UPx->TC0 */ + tc = &adapter->dcb_cfg.tc_config[0]; + tc->path[IXGBE_DCB_TX_CONFIG].up_to_tc_bitmap = 0xFF; + tc->path[IXGBE_DCB_RX_CONFIG].up_to_tc_bitmap = 0xFF; + + adapter->dcb_cfg.bw_percentage[IXGBE_DCB_TX_CONFIG][0] = 100; + adapter->dcb_cfg.bw_percentage[IXGBE_DCB_RX_CONFIG][0] = 100; + adapter->dcb_cfg.rx_pba_cfg = ixgbe_dcb_pba_equal; + adapter->dcb_cfg.pfc_mode_enable = false; + adapter->dcb_cfg.round_robin_enable = false; + adapter->dcb_set_bitmap = 0x00; +#ifdef CONFIG_DCB + adapter->dcbx_cap = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_CEE; +#endif /* CONFIG_DCB */ + + if (hw->mac.type == ixgbe_mac_X540) { + adapter->dcb_cfg.num_tcs.pg_tcs = 4; + adapter->dcb_cfg.num_tcs.pfc_tcs = 4; + } + } +#ifdef CONFIG_DCB + /* XXX does this need to be initialized even w/o DCB? */ + //memcpy(&adapter->temp_dcb_cfg, &adapter->dcb_cfg, + // sizeof(adapter->temp_dcb_cfg)); + +#endif + //if (hw->mac.type == ixgbe_mac_82599EB || + // hw->mac.type == ixgbe_mac_X540) + // hw->mbx.ops.init_params(hw); + + /* default flow control settings */ + hw->fc.requested_mode = ixgbe_fc_full; + hw->fc.current_mode = ixgbe_fc_full; /* init for ethtool output */ + + adapter->last_lfc_mode = hw->fc.current_mode; + ixgbe_pbthresh_setup(adapter); + hw->fc.pause_time = IXGBE_DEFAULT_FCPAUSE; + hw->fc.send_xon = true; + hw->fc.disable_fc_autoneg = false; + + /* set default ring sizes */ + adapter->tx_ring_count = IXGBE_DEFAULT_TXD; + adapter->rx_ring_count = IXGBE_DEFAULT_RXD; + + /* set default work limits */ + adapter->tx_work_limit = IXGBE_DEFAULT_TX_WORK; + adapter->rx_work_limit = IXGBE_DEFAULT_RX_WORK; + + set_bit(__IXGBE_DOWN, &adapter->state); +out: + return err; +} + +/** + * ixgbe_setup_tx_resources - allocate Tx resources (Descriptors) + * @tx_ring: tx descriptor ring (for a specific queue) to setup + * + * Return 0 on success, negative on failure + **/ +int ixgbe_setup_tx_resources(struct ixgbe_ring *tx_ring) +{ + struct device *dev = tx_ring->dev; + //int orig_node = dev_to_node(dev); + int numa_node = -1; + int size; + + size = sizeof(struct ixgbe_tx_buffer) * tx_ring->count; + + if (tx_ring->q_vector) + numa_node = tx_ring->q_vector->numa_node; + + tx_ring->tx_buffer_info = vzalloc_node(size, numa_node); + if (!tx_ring->tx_buffer_info) + tx_ring->tx_buffer_info = vzalloc(size); + if (!tx_ring->tx_buffer_info) + goto err; + + /* round up to nearest 4K */ + tx_ring->size = tx_ring->count * sizeof(union ixgbe_adv_tx_desc); + tx_ring->size = ALIGN(tx_ring->size, 4096); + + //set_dev_node(dev, numa_node); + //tx_ring->desc = dma_alloc_coherent(dev, + // tx_ring->size, + // &tx_ring->dma, + // GFP_KERNEL); + //set_dev_node(dev, orig_node); + //if (!tx_ring->desc) + // tx_ring->desc = dma_alloc_coherent(dev, tx_ring->size, + // &tx_ring->dma, GFP_KERNEL); + //if (!tx_ring->desc) + // goto err; + + return 0; + +err: + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Tx descriptor ring\n"); + return -ENOMEM; +} + +/** + * ixgbe_setup_all_tx_resources - allocate all queues Tx resources + * @adapter: board private structure + * + * If this function returns with an error, then it's possible one or + * more of the rings is populated (while the rest are not). It is the + * callers duty to clean those orphaned rings. + * + * Return 0 on success, negative on failure + **/ +static int ixgbe_setup_all_tx_resources(struct ixgbe_adapter *adapter) +{ + int i, err = 0; + + for (i = 0; i < adapter->num_tx_queues; i++) { + err = ixgbe_setup_tx_resources(adapter->tx_ring[i]); + if (!err) + continue; + e_err(probe, "Allocation for Tx Queue %u failed\n", i); + break; + } + + return err; +} + +/** + * ixgbe_setup_rx_resources - allocate Rx resources (Descriptors) + * @rx_ring: rx descriptor ring (for a specific queue) to setup + * + * Returns 0 on success, negative on failure + **/ +int ixgbe_setup_rx_resources(struct ixgbe_ring *rx_ring) +{ + struct device *dev = rx_ring->dev; + //int orig_node = dev_to_node(dev); + int numa_node = -1; + int size; + + size = sizeof(struct ixgbe_rx_buffer) * rx_ring->count; + + if (rx_ring->q_vector) + numa_node = rx_ring->q_vector->numa_node; + + rx_ring->rx_buffer_info = vzalloc_node(size, numa_node); + if (!rx_ring->rx_buffer_info) + rx_ring->rx_buffer_info = vzalloc(size); + if (!rx_ring->rx_buffer_info) + goto err; + + /* Round up to nearest 4K */ + rx_ring->size = rx_ring->count * sizeof(union ixgbe_adv_rx_desc); + rx_ring->size = ALIGN(rx_ring->size, 4096); + +#ifdef NO_VNIC + set_dev_node(dev, numa_node); + rx_ring->desc = dma_alloc_coherent(dev, + rx_ring->size, + &rx_ring->dma, + GFP_KERNEL); + set_dev_node(dev, orig_node); + if (!rx_ring->desc) + rx_ring->desc = dma_alloc_coherent(dev, rx_ring->size, + &rx_ring->dma, GFP_KERNEL); + if (!rx_ring->desc) + goto err; + +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT + ixgbe_init_rx_page_offset(rx_ring); + +#endif + +#endif /* NO_VNIC */ + return 0; +err: + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + dev_err(dev, "Unable to allocate memory for the Rx descriptor ring\n"); + return -ENOMEM; +} + +/** + * ixgbe_setup_all_rx_resources - allocate all queues Rx resources + * @adapter: board private structure + * + * If this function returns with an error, then it's possible one or + * more of the rings is populated (while the rest are not). It is the + * callers duty to clean those orphaned rings. + * + * Return 0 on success, negative on failure + **/ +static int ixgbe_setup_all_rx_resources(struct ixgbe_adapter *adapter) +{ + int i, err = 0; + + for (i = 0; i < adapter->num_rx_queues; i++) { + err = ixgbe_setup_rx_resources(adapter->rx_ring[i]); + if (!err) + continue; + e_err(probe, "Allocation for Rx Queue %u failed\n", i); + break; + } + + return err; +} + +/** + * ixgbe_free_tx_resources - Free Tx Resources per Queue + * @tx_ring: Tx descriptor ring for a specific queue + * + * Free all transmit software resources + **/ +void ixgbe_free_tx_resources(struct ixgbe_ring *tx_ring) +{ + //ixgbe_clean_tx_ring(tx_ring); + + vfree(tx_ring->tx_buffer_info); + tx_ring->tx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!tx_ring->desc) + return; + + //dma_free_coherent(tx_ring->dev, tx_ring->size, + // tx_ring->desc, tx_ring->dma); + + tx_ring->desc = NULL; +} + +/** + * ixgbe_free_all_tx_resources - Free Tx Resources for All Queues + * @adapter: board private structure + * + * Free all transmit software resources + **/ +static void ixgbe_free_all_tx_resources(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_tx_queues; i++) + if (adapter->tx_ring[i]->desc) + ixgbe_free_tx_resources(adapter->tx_ring[i]); +} + +/** + * ixgbe_free_rx_resources - Free Rx Resources + * @rx_ring: ring to clean the resources from + * + * Free all receive software resources + **/ +void ixgbe_free_rx_resources(struct ixgbe_ring *rx_ring) +{ + //ixgbe_clean_rx_ring(rx_ring); + + vfree(rx_ring->rx_buffer_info); + rx_ring->rx_buffer_info = NULL; + + /* if not set, then don't free */ + if (!rx_ring->desc) + return; + + //dma_free_coherent(rx_ring->dev, rx_ring->size, + // rx_ring->desc, rx_ring->dma); + + rx_ring->desc = NULL; +} + +/** + * ixgbe_free_all_rx_resources - Free Rx Resources for All Queues + * @adapter: board private structure + * + * Free all receive software resources + **/ +static void ixgbe_free_all_rx_resources(struct ixgbe_adapter *adapter) +{ + int i; + + for (i = 0; i < adapter->num_rx_queues; i++) + if (adapter->rx_ring[i]->desc) + ixgbe_free_rx_resources(adapter->rx_ring[i]); +} + + +/** + * ixgbe_open - Called when a network interface is made active + * @netdev: network interface device structure + * + * Returns 0 on success, negative value on failure + * + * The open entry point is called when a network interface is made + * active by the system (IFF_UP). At this point all resources needed + * for transmit and receive operations are allocated, the interrupt + * handler is registered with the OS, the watchdog timer is started, + * and the stack is notified that the interface is ready. + **/ +//static +int ixgbe_open(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + int err; + + /* disallow open during test */ + if (test_bit(__IXGBE_TESTING, &adapter->state)) + return -EBUSY; + + netif_carrier_off(netdev); + + /* allocate transmit descriptors */ + err = ixgbe_setup_all_tx_resources(adapter); + if (err) + goto err_setup_tx; + + /* allocate receive descriptors */ + err = ixgbe_setup_all_rx_resources(adapter); + if (err) + goto err_setup_rx; + +#ifdef NO_VNIC + ixgbe_configure(adapter); + + err = ixgbe_request_irq(adapter); + if (err) + goto err_req_irq; + + ixgbe_up_complete(adapter); + +err_req_irq: +#else + return 0; +#endif +err_setup_rx: + ixgbe_free_all_rx_resources(adapter); +err_setup_tx: + ixgbe_free_all_tx_resources(adapter); + ixgbe_reset(adapter); + + return err; +} + +/** + * ixgbe_close - Disables a network interface + * @netdev: network interface device structure + * + * Returns 0, this is not allowed to fail + * + * The close entry point is called when an interface is de-activated + * by the OS. The hardware is still under the drivers control, but + * needs to be disabled. A global MAC reset is issued to stop the + * hardware, and all transmit and receive resources are freed. + **/ +//static +int ixgbe_close(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + //ixgbe_down(adapter); + //ixgbe_free_irq(adapter); + + //ixgbe_fdir_filter_exit(adapter); + + //ixgbe_free_all_tx_resources(adapter); + //ixgbe_free_all_rx_resources(adapter); + + ixgbe_release_hw_control(adapter); + + return 0; +} + + + + + +/** + * ixgbe_get_stats - Get System Network Statistics + * @netdev: network interface device structure + * + * Returns the address of the device statistics structure. + * The statistics are actually updated from the timer callback. + **/ +//static +struct net_device_stats *ixgbe_get_stats(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + /* update the stats data */ + ixgbe_update_stats(adapter); + +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + /* only return the current stats */ + return &netdev->stats; +#else + /* only return the current stats */ + return &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ +} + +/** + * ixgbe_update_stats - Update the board statistics counters. + * @adapter: board private structure + **/ +void ixgbe_update_stats(struct ixgbe_adapter *adapter) +{ +#ifdef HAVE_NETDEV_STATS_IN_NETDEV + struct net_device_stats *net_stats = &adapter->netdev->stats; +#else + struct net_device_stats *net_stats = &adapter->net_stats; +#endif /* HAVE_NETDEV_STATS_IN_NETDEV */ + struct ixgbe_hw *hw = &adapter->hw; + struct ixgbe_hw_stats *hwstats = &adapter->stats; + u64 total_mpc = 0; + u32 i, missed_rx = 0, mpc, bprc, lxon, lxoff, xon_off_tot; + u64 non_eop_descs = 0, restart_queue = 0, tx_busy = 0; + u64 alloc_rx_page_failed = 0, alloc_rx_buff_failed = 0; + u64 bytes = 0, packets = 0, hw_csum_rx_error = 0; +#ifndef IXGBE_NO_LRO + u32 flushed = 0, coal = 0; + int num_q_vectors = 1; +#endif +#ifdef IXGBE_FCOE + struct ixgbe_fcoe *fcoe = &adapter->fcoe; + unsigned int cpu; + u64 fcoe_noddp_counts_sum = 0, fcoe_noddp_ext_buff_counts_sum = 0; +#endif /* IXGBE_FCOE */ + + printk(KERN_DEBUG "ixgbe_update_stats, tx_queues=%d, rx_queues=%d\n", + adapter->num_tx_queues, adapter->num_rx_queues); + + if (test_bit(__IXGBE_DOWN, &adapter->state) || + test_bit(__IXGBE_RESETTING, &adapter->state)) + return; + +#ifndef IXGBE_NO_LRO + if (adapter->flags & IXGBE_FLAG_MSIX_ENABLED) + num_q_vectors = adapter->num_msix_vectors - NON_Q_VECTORS; + +#endif + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + u64 rsc_count = 0; + u64 rsc_flush = 0; + for (i = 0; i < adapter->num_rx_queues; i++) { + rsc_count += adapter->rx_ring[i]->rx_stats.rsc_count; + rsc_flush += adapter->rx_ring[i]->rx_stats.rsc_flush; + } + adapter->rsc_total_count = rsc_count; + adapter->rsc_total_flush = rsc_flush; + } + +#ifndef IXGBE_NO_LRO + for (i = 0; i < num_q_vectors; i++) { + struct ixgbe_q_vector *q_vector = adapter->q_vector[i]; + if (!q_vector) + continue; + flushed += q_vector->lrolist.stats.flushed; + coal += q_vector->lrolist.stats.coal; + } + adapter->lro_stats.flushed = flushed; + adapter->lro_stats.coal = coal; + +#endif + for (i = 0; i < adapter->num_rx_queues; i++) { + struct ixgbe_ring *rx_ring = adapter->rx_ring[i]; + non_eop_descs += rx_ring->rx_stats.non_eop_descs; + alloc_rx_page_failed += rx_ring->rx_stats.alloc_rx_page_failed; + alloc_rx_buff_failed += rx_ring->rx_stats.alloc_rx_buff_failed; + hw_csum_rx_error += rx_ring->rx_stats.csum_err; + bytes += rx_ring->stats.bytes; + packets += rx_ring->stats.packets; + + } + adapter->non_eop_descs = non_eop_descs; + adapter->alloc_rx_page_failed = alloc_rx_page_failed; + adapter->alloc_rx_buff_failed = alloc_rx_buff_failed; + adapter->hw_csum_rx_error = hw_csum_rx_error; + net_stats->rx_bytes = bytes; + net_stats->rx_packets = packets; + + bytes = 0; + packets = 0; + /* gather some stats to the adapter struct that are per queue */ + for (i = 0; i < adapter->num_tx_queues; i++) { + struct ixgbe_ring *tx_ring = adapter->tx_ring[i]; + restart_queue += tx_ring->tx_stats.restart_queue; + tx_busy += tx_ring->tx_stats.tx_busy; + bytes += tx_ring->stats.bytes; + packets += tx_ring->stats.packets; + } + adapter->restart_queue = restart_queue; + adapter->tx_busy = tx_busy; + net_stats->tx_bytes = bytes; + net_stats->tx_packets = packets; + + hwstats->crcerrs += IXGBE_READ_REG(hw, IXGBE_CRCERRS); + + /* 8 register reads */ + for (i = 0; i < 8; i++) { + /* for packet buffers not used, the register should read 0 */ + mpc = IXGBE_READ_REG(hw, IXGBE_MPC(i)); + missed_rx += mpc; + hwstats->mpc[i] += mpc; + total_mpc += hwstats->mpc[i]; + hwstats->pxontxc[i] += IXGBE_READ_REG(hw, IXGBE_PXONTXC(i)); + hwstats->pxofftxc[i] += IXGBE_READ_REG(hw, IXGBE_PXOFFTXC(i)); + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + hwstats->rnbc[i] += IXGBE_READ_REG(hw, IXGBE_RNBC(i)); + hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC(i)); + hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC(i)); + hwstats->pxonrxc[i] += + IXGBE_READ_REG(hw, IXGBE_PXONRXC(i)); + break; + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + hwstats->pxonrxc[i] += + IXGBE_READ_REG(hw, IXGBE_PXONRXCNT(i)); + break; + default: + break; + } + } + + /*16 register reads */ + for (i = 0; i < 16; i++) { + hwstats->qptc[i] += IXGBE_READ_REG(hw, IXGBE_QPTC(i)); + hwstats->qprc[i] += IXGBE_READ_REG(hw, IXGBE_QPRC(i)); + if ((hw->mac.type == ixgbe_mac_82599EB) || + (hw->mac.type == ixgbe_mac_X540)) { + hwstats->qbtc[i] += IXGBE_READ_REG(hw, IXGBE_QBTC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBTC_H(i)); /* to clear */ + hwstats->qbrc[i] += IXGBE_READ_REG(hw, IXGBE_QBRC_L(i)); + IXGBE_READ_REG(hw, IXGBE_QBRC_H(i)); /* to clear */ + } + } + + hwstats->gprc += IXGBE_READ_REG(hw, IXGBE_GPRC); + /* work around hardware counting issue */ + hwstats->gprc -= missed_rx; + + ixgbe_update_xoff_received(adapter); + + /* 82598 hardware only has a 32 bit counter in the high register */ + switch (hw->mac.type) { + case ixgbe_mac_82598EB: + hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXC); + hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCH); + hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCH); + hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORH); + break; + case ixgbe_mac_X540: + /* OS2BMC stats are X540 only*/ + hwstats->o2bgptc += IXGBE_READ_REG(hw, IXGBE_O2BGPTC); + hwstats->o2bspc += IXGBE_READ_REG(hw, IXGBE_O2BSPC); + hwstats->b2ospc += IXGBE_READ_REG(hw, IXGBE_B2OSPC); + hwstats->b2ogprc += IXGBE_READ_REG(hw, IXGBE_B2OGPRC); + case ixgbe_mac_82599EB: + for (i = 0; i < 16; i++) + adapter->hw_rx_no_dma_resources += + IXGBE_READ_REG(hw, IXGBE_QPRDC(i)); + hwstats->gorc += IXGBE_READ_REG(hw, IXGBE_GORCL); + IXGBE_READ_REG(hw, IXGBE_GORCH); /* to clear */ + hwstats->gotc += IXGBE_READ_REG(hw, IXGBE_GOTCL); + IXGBE_READ_REG(hw, IXGBE_GOTCH); /* to clear */ + hwstats->tor += IXGBE_READ_REG(hw, IXGBE_TORL); + IXGBE_READ_REG(hw, IXGBE_TORH); /* to clear */ + hwstats->lxonrxc += IXGBE_READ_REG(hw, IXGBE_LXONRXCNT); +#ifdef HAVE_TX_MQ + hwstats->fdirmatch += IXGBE_READ_REG(hw, IXGBE_FDIRMATCH); + hwstats->fdirmiss += IXGBE_READ_REG(hw, IXGBE_FDIRMISS); +#endif /* HAVE_TX_MQ */ +#ifdef IXGBE_FCOE + hwstats->fccrc += IXGBE_READ_REG(hw, IXGBE_FCCRC); + hwstats->fclast += IXGBE_READ_REG(hw, IXGBE_FCLAST); + hwstats->fcoerpdc += IXGBE_READ_REG(hw, IXGBE_FCOERPDC); + hwstats->fcoeprc += IXGBE_READ_REG(hw, IXGBE_FCOEPRC); + hwstats->fcoeptc += IXGBE_READ_REG(hw, IXGBE_FCOEPTC); + hwstats->fcoedwrc += IXGBE_READ_REG(hw, IXGBE_FCOEDWRC); + hwstats->fcoedwtc += IXGBE_READ_REG(hw, IXGBE_FCOEDWTC); + /* Add up per cpu counters for total ddp aloc fail */ + if (fcoe && fcoe->pcpu_noddp && fcoe->pcpu_noddp_ext_buff) { + for_each_possible_cpu(cpu) { + fcoe_noddp_counts_sum += + *per_cpu_ptr(fcoe->pcpu_noddp, cpu); + fcoe_noddp_ext_buff_counts_sum += + *per_cpu_ptr(fcoe-> + pcpu_noddp_ext_buff, cpu); + } + } + hwstats->fcoe_noddp = fcoe_noddp_counts_sum; + hwstats->fcoe_noddp_ext_buff = fcoe_noddp_ext_buff_counts_sum; + +#endif /* IXGBE_FCOE */ + break; + default: + break; + } + bprc = IXGBE_READ_REG(hw, IXGBE_BPRC); + hwstats->bprc += bprc; + hwstats->mprc += IXGBE_READ_REG(hw, IXGBE_MPRC); + if (hw->mac.type == ixgbe_mac_82598EB) + hwstats->mprc -= bprc; + hwstats->roc += IXGBE_READ_REG(hw, IXGBE_ROC); + hwstats->prc64 += IXGBE_READ_REG(hw, IXGBE_PRC64); + hwstats->prc127 += IXGBE_READ_REG(hw, IXGBE_PRC127); + hwstats->prc255 += IXGBE_READ_REG(hw, IXGBE_PRC255); + hwstats->prc511 += IXGBE_READ_REG(hw, IXGBE_PRC511); + hwstats->prc1023 += IXGBE_READ_REG(hw, IXGBE_PRC1023); + hwstats->prc1522 += IXGBE_READ_REG(hw, IXGBE_PRC1522); + hwstats->rlec += IXGBE_READ_REG(hw, IXGBE_RLEC); + lxon = IXGBE_READ_REG(hw, IXGBE_LXONTXC); + hwstats->lxontxc += lxon; + lxoff = IXGBE_READ_REG(hw, IXGBE_LXOFFTXC); + hwstats->lxofftxc += lxoff; + hwstats->gptc += IXGBE_READ_REG(hw, IXGBE_GPTC); + hwstats->mptc += IXGBE_READ_REG(hw, IXGBE_MPTC); + /* + * 82598 errata - tx of flow control packets is included in tx counters + */ + xon_off_tot = lxon + lxoff; + hwstats->gptc -= xon_off_tot; + hwstats->mptc -= xon_off_tot; + hwstats->gotc -= (xon_off_tot * (ETH_ZLEN + ETH_FCS_LEN)); + hwstats->ruc += IXGBE_READ_REG(hw, IXGBE_RUC); + hwstats->rfc += IXGBE_READ_REG(hw, IXGBE_RFC); + hwstats->rjc += IXGBE_READ_REG(hw, IXGBE_RJC); + hwstats->tpr += IXGBE_READ_REG(hw, IXGBE_TPR); + hwstats->ptc64 += IXGBE_READ_REG(hw, IXGBE_PTC64); + hwstats->ptc64 -= xon_off_tot; + hwstats->ptc127 += IXGBE_READ_REG(hw, IXGBE_PTC127); + hwstats->ptc255 += IXGBE_READ_REG(hw, IXGBE_PTC255); + hwstats->ptc511 += IXGBE_READ_REG(hw, IXGBE_PTC511); + hwstats->ptc1023 += IXGBE_READ_REG(hw, IXGBE_PTC1023); + hwstats->ptc1522 += IXGBE_READ_REG(hw, IXGBE_PTC1522); + hwstats->bptc += IXGBE_READ_REG(hw, IXGBE_BPTC); + /* Fill out the OS statistics structure */ + net_stats->multicast = hwstats->mprc; + + /* Rx Errors */ + net_stats->rx_errors = hwstats->crcerrs + + hwstats->rlec; + net_stats->rx_dropped = 0; + net_stats->rx_length_errors = hwstats->rlec; + net_stats->rx_crc_errors = hwstats->crcerrs; + net_stats->rx_missed_errors = total_mpc; + + /* + * VF Stats Collection - skip while resetting because these + * are not clear on read and otherwise you'll sometimes get + * crazy values. + */ + if (!test_bit(__IXGBE_RESETTING, &adapter->state)) { + for (i = 0; i < adapter->num_vfs; i++) { + UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPRC(i), \ + adapter->vfinfo[i].last_vfstats.gprc, \ + adapter->vfinfo[i].vfstats.gprc); + UPDATE_VF_COUNTER_32bit(IXGBE_PVFGPTC(i), \ + adapter->vfinfo[i].last_vfstats.gptc, \ + adapter->vfinfo[i].vfstats.gptc); + UPDATE_VF_COUNTER_36bit(IXGBE_PVFGORC_LSB(i), \ + IXGBE_PVFGORC_MSB(i), \ + adapter->vfinfo[i].last_vfstats.gorc, \ + adapter->vfinfo[i].vfstats.gorc); + UPDATE_VF_COUNTER_36bit(IXGBE_PVFGOTC_LSB(i), \ + IXGBE_PVFGOTC_MSB(i), \ + adapter->vfinfo[i].last_vfstats.gotc, \ + adapter->vfinfo[i].vfstats.gotc); + UPDATE_VF_COUNTER_32bit(IXGBE_PVFMPRC(i), \ + adapter->vfinfo[i].last_vfstats.mprc, \ + adapter->vfinfo[i].vfstats.mprc); + } + } +} + + +#ifdef NO_VNIC + +/** + * ixgbe_watchdog_update_link - update the link status + * @adapter - pointer to the device adapter structure + * @link_speed - pointer to a u32 to store the link_speed + **/ +static void ixgbe_watchdog_update_link(struct ixgbe_adapter *adapter) +{ + struct ixgbe_hw *hw = &adapter->hw; + u32 link_speed = adapter->link_speed; + bool link_up = adapter->link_up; + bool pfc_en = adapter->dcb_cfg.pfc_mode_enable; + + if (!(adapter->flags & IXGBE_FLAG_NEED_LINK_UPDATE)) + return; + + if (hw->mac.ops.check_link) { + hw->mac.ops.check_link(hw, &link_speed, &link_up, false); + } else { + /* always assume link is up, if no check link function */ + link_speed = IXGBE_LINK_SPEED_10GB_FULL; + link_up = true; + } + +#ifdef HAVE_DCBNL_IEEE + if (adapter->ixgbe_ieee_pfc) + pfc_en |= !!(adapter->ixgbe_ieee_pfc->pfc_en); + +#endif + if (link_up && !((adapter->flags & IXGBE_FLAG_DCB_ENABLED) && pfc_en)) { + hw->mac.ops.fc_enable(hw); + //ixgbe_set_rx_drop_en(adapter); + } + + if (link_up || + time_after(jiffies, (adapter->link_check_timeout + + IXGBE_TRY_LINK_TIMEOUT))) { + adapter->flags &= ~IXGBE_FLAG_NEED_LINK_UPDATE; + IXGBE_WRITE_REG(hw, IXGBE_EIMS, IXGBE_EIMC_LSC); + IXGBE_WRITE_FLUSH(hw); + } + + adapter->link_up = link_up; + adapter->link_speed = link_speed; +} +#endif + + + +#ifdef NO_VNIC + +/** + * ixgbe_service_task - manages and runs subtasks + * @work: pointer to work_struct containing our data + **/ +static void ixgbe_service_task(struct work_struct *work) +{ + //struct ixgbe_adapter *adapter = container_of(work, + // struct ixgbe_adapter, + // service_task); + + //ixgbe_reset_subtask(adapter); + //ixgbe_sfp_detection_subtask(adapter); + //ixgbe_sfp_link_config_subtask(adapter); + //ixgbe_check_overtemp_subtask(adapter); + //ixgbe_watchdog_subtask(adapter); +#ifdef HAVE_TX_MQ + //ixgbe_fdir_reinit_subtask(adapter); +#endif + //ixgbe_check_hang_subtask(adapter); + + //ixgbe_service_event_complete(adapter); +} + + + + +#define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ + IXGBE_TXD_CMD_RS) + + +/** + * ixgbe_set_mac - Change the Ethernet Address of the NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int ixgbe_set_mac(struct net_device *netdev, void *p) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + struct ixgbe_hw *hw = &adapter->hw; + struct sockaddr *addr = p; + int ret; + + if (!is_valid_ether_addr(addr->sa_data)) + return -EADDRNOTAVAIL; + + ixgbe_del_mac_filter(adapter, hw->mac.addr, + adapter->num_vfs); + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); + + + /* set the correct pool for the new PF MAC address in entry 0 */ + ret = ixgbe_add_mac_filter(adapter, hw->mac.addr, + adapter->num_vfs); + return ret > 0 ? 0 : ret; +} + + +/** + * ixgbe_ioctl - + * @netdev: + * @ifreq: + * @cmd: + **/ +static int ixgbe_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) +{ + switch (cmd) { +#ifdef ETHTOOL_OPS_COMPAT + case SIOCETHTOOL: + return ethtool_ioctl(ifr); +#endif + default: + return -EOPNOTSUPP; + } +} +#endif /* NO_VNIC */ + + +void ixgbe_do_reset(struct net_device *netdev) +{ + struct ixgbe_adapter *adapter = netdev_priv(netdev); + + if (netif_running(netdev)) + ixgbe_reinit_locked(adapter); + else + ixgbe_reset(adapter); +} + + + + + + +/** + * ixgbe_probe - Device Initialization Routine + * @pdev: PCI device information struct + * @ent: entry in ixgbe_pci_tbl + * + * Returns 0 on success, negative on failure + * + * ixgbe_probe initializes an adapter identified by a pci_dev structure. + * The OS initialization, configuring of the adapter private structure, + * and a hardware reset occur. + **/ +//static +int ixgbe_kni_probe(struct pci_dev *pdev, + struct net_device **lad_dev) +{ + size_t count; + struct net_device *netdev; + struct ixgbe_adapter *adapter = NULL; + struct ixgbe_hw *hw = NULL; + static int cards_found; + int i, err; + u16 offset; + u16 eeprom_verh, eeprom_verl, eeprom_cfg_blkh, eeprom_cfg_blkl; + u32 etrack_id; + u16 build, major, patch; + char *info_string, *i_s_var; + u8 part_str[IXGBE_PBANUM_LENGTH]; + enum ixgbe_mac_type mac_type = ixgbe_mac_unknown; +#ifdef HAVE_TX_MQ + unsigned int indices = num_possible_cpus(); +#endif /* HAVE_TX_MQ */ +#ifdef IXGBE_FCOE + u16 device_caps; +#endif + u16 wol_cap; + + err = pci_enable_device_mem(pdev); + if (err) + return err; + + +#ifdef NO_VNIC + err = pci_request_selected_regions(pdev, pci_select_bars(pdev, + IORESOURCE_MEM), ixgbe_driver_name); + if (err) { + dev_err(pci_dev_to_dev(pdev), + "pci_request_selected_regions failed 0x%x\n", err); + goto err_pci_reg; + } +#endif + + /* + * The mac_type is needed before we have the adapter is set up + * so rather than maintain two devID -> MAC tables we dummy up + * an ixgbe_hw stuct and use ixgbe_set_mac_type. + */ + hw = vmalloc(sizeof(struct ixgbe_hw)); + if (!hw) { + pr_info("Unable to allocate memory for early mac " + "check\n"); + } else { + hw->vendor_id = pdev->vendor; + hw->device_id = pdev->device; + ixgbe_set_mac_type(hw); + mac_type = hw->mac.type; + vfree(hw); + } + +#ifdef NO_VNIC + /* + * Workaround of Silicon errata on 82598. Disable LOs in the PCI switch + * port to which the 82598 is connected to prevent duplicate + * completions caused by LOs. We need the mac type so that we only + * do this on 82598 devices, ixgbe_set_mac_type does this for us if + * we set it's device ID. + */ + if (mac_type == ixgbe_mac_82598EB) + pci_disable_link_state(pdev, PCIE_LINK_STATE_L0S); + + pci_enable_pcie_error_reporting(pdev); + + pci_set_master(pdev); +#endif + +#ifdef HAVE_TX_MQ +#ifdef CONFIG_DCB +#ifdef HAVE_MQPRIO + indices *= IXGBE_DCB_MAX_TRAFFIC_CLASS; +#else + indices = max_t(unsigned int, indices, IXGBE_MAX_DCB_INDICES); +#endif /* HAVE_MQPRIO */ +#endif /* CONFIG_DCB */ + + if (mac_type == ixgbe_mac_82598EB) + indices = min_t(unsigned int, indices, IXGBE_MAX_RSS_INDICES); + else + indices = min_t(unsigned int, indices, IXGBE_MAX_FDIR_INDICES); + +#ifdef IXGBE_FCOE + indices += min_t(unsigned int, num_possible_cpus(), + IXGBE_MAX_FCOE_INDICES); +#endif + netdev = alloc_etherdev_mq(sizeof(struct ixgbe_adapter), indices); +#else /* HAVE_TX_MQ */ + netdev = alloc_etherdev(sizeof(struct ixgbe_adapter)); +#endif /* HAVE_TX_MQ */ + if (!netdev) { + err = -ENOMEM; + goto err_alloc_etherdev; + } + + SET_NETDEV_DEV(netdev, &pdev->dev); + + adapter = netdev_priv(netdev); + //pci_set_drvdata(pdev, adapter); + + adapter->netdev = netdev; + adapter->pdev = pdev; + hw = &adapter->hw; + hw->back = adapter; + adapter->msg_enable = (1 << DEFAULT_DEBUG_LEVEL_SHIFT) - 1; + +#ifdef HAVE_PCI_ERS + /* + * call save state here in standalone driver because it relies on + * adapter struct to exist, and needs to call netdev_priv + */ + pci_save_state(pdev); + +#endif + hw->hw_addr = ioremap(pci_resource_start(pdev, 0), + pci_resource_len(pdev, 0)); + if (!hw->hw_addr) { + err = -EIO; + goto err_ioremap; + } + //ixgbe_assign_netdev_ops(netdev); + ixgbe_set_ethtool_ops(netdev); + + strlcpy(netdev->name, pci_name(pdev), sizeof(netdev->name)); + + adapter->bd_number = cards_found; + + /* setup the private structure */ + err = ixgbe_sw_init(adapter); + if (err) + goto err_sw_init; + + /* Make it possible the adapter to be woken up via WOL */ + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + IXGBE_WRITE_REG(&adapter->hw, IXGBE_WUS, ~0); + break; + default: + break; + } + + /* + * check_options must be called before setup_link to set up + * hw->fc completely + */ + //ixgbe_check_options(adapter); + +#ifndef NO_VNIC + /* reset_hw fills in the perm_addr as well */ + hw->phy.reset_if_overtemp = true; + err = hw->mac.ops.reset_hw(hw); + hw->phy.reset_if_overtemp = false; + if (err == IXGBE_ERR_SFP_NOT_PRESENT && + hw->mac.type == ixgbe_mac_82598EB) { + err = 0; + } else if (err == IXGBE_ERR_SFP_NOT_SUPPORTED) { + e_dev_err("failed to load because an unsupported SFP+ " + "module type was detected.\n"); + e_dev_err("Reload the driver after installing a supported " + "module.\n"); + goto err_sw_init; + } else if (err) { + e_dev_err("HW Init failed: %d\n", err); + goto err_sw_init; + } +#endif + + //if (adapter->flags & IXGBE_FLAG_SRIOV_ENABLED) + // ixgbe_probe_vf(adapter); + + +#ifdef MAX_SKB_FRAGS + netdev->features |= NETIF_F_SG | + NETIF_F_IP_CSUM; + +#ifdef NETIF_F_IPV6_CSUM + netdev->features |= NETIF_F_IPV6_CSUM; +#endif + +#ifdef NETIF_F_HW_VLAN_TX + netdev->features |= NETIF_F_HW_VLAN_TX | + NETIF_F_HW_VLAN_RX; +#endif +#ifdef NETIF_F_TSO + netdev->features |= NETIF_F_TSO; +#endif /* NETIF_F_TSO */ +#ifdef NETIF_F_TSO6 + netdev->features |= NETIF_F_TSO6; +#endif /* NETIF_F_TSO6 */ +#ifdef NETIF_F_RXHASH + netdev->features |= NETIF_F_RXHASH; +#endif /* NETIF_F_RXHASH */ + +#ifdef HAVE_NDO_SET_FEATURES + netdev->features |= NETIF_F_RXCSUM; + + /* copy netdev features into list of user selectable features */ + netdev->hw_features |= netdev->features; + + /* give us the option of enabling RSC/LRO later */ +#ifdef IXGBE_NO_LRO + if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) +#endif + netdev->hw_features |= NETIF_F_LRO; + +#else +#ifdef NETIF_F_GRO + + /* this is only needed on kernels prior to 2.6.39 */ + netdev->features |= NETIF_F_GRO; +#endif /* NETIF_F_GRO */ +#endif + +#ifdef NETIF_F_HW_VLAN_TX + /* set this bit last since it cannot be part of hw_features */ + netdev->features |= NETIF_F_HW_VLAN_FILTER; +#endif + switch (adapter->hw.mac.type) { + case ixgbe_mac_82599EB: + case ixgbe_mac_X540: + netdev->features |= NETIF_F_SCTP_CSUM; +#ifdef HAVE_NDO_SET_FEATURES + netdev->hw_features |= NETIF_F_SCTP_CSUM | + NETIF_F_NTUPLE; +#endif + break; + default: + break; + } + +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_SG | + NETIF_F_IP_CSUM | + NETIF_F_IPV6_CSUM | + NETIF_F_TSO | + NETIF_F_TSO6; + +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + /* + * If perfect filters were enabled in check_options(), enable them + * on the netdevice too. + */ + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) + netdev->features |= NETIF_F_NTUPLE; + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) + adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + if (adapter->flags & IXGBE_FLAG_VMDQ_ENABLED) { + adapter->flags &= ~IXGBE_FLAG_FDIR_HASH_CAPABLE; + /* clear n-tuple support in the netdev unconditionally */ + netdev->features &= ~NETIF_F_NTUPLE; + } + +#ifdef NETIF_F_RXHASH + if (!(adapter->flags & IXGBE_FLAG_RSS_ENABLED)) + netdev->features &= ~NETIF_F_RXHASH; + +#endif /* NETIF_F_RXHASH */ + if (netdev->features & NETIF_F_LRO) { + if ((adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) && + ((adapter->rx_itr_setting == 1) || + (adapter->rx_itr_setting > IXGBE_MIN_RSC_ITR))) { + adapter->flags2 |= IXGBE_FLAG2_RSC_ENABLED; + } else if (adapter->flags2 & IXGBE_FLAG2_RSC_CAPABLE) { +#ifdef IXGBE_NO_LRO + e_info(probe, "InterruptThrottleRate set too high, " + "disabling RSC\n"); +#else + e_info(probe, "InterruptThrottleRate set too high, " + "falling back to software LRO\n"); +#endif + } + } +#ifdef CONFIG_DCB + //netdev->dcbnl_ops = &dcbnl_ops; +#endif + +#ifdef IXGBE_FCOE +#ifdef NETIF_F_FSO + if (adapter->flags & IXGBE_FLAG_FCOE_CAPABLE) { + ixgbe_get_device_caps(hw, &device_caps); + if (device_caps & IXGBE_DEVICE_CAPS_FCOE_OFFLOADS) { + adapter->flags &= ~IXGBE_FLAG_FCOE_ENABLED; + adapter->flags &= ~IXGBE_FLAG_FCOE_CAPABLE; + e_info(probe, "FCoE offload feature is not available. " + "Disabling FCoE offload feature\n"); + } +#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE + else { + adapter->flags |= IXGBE_FLAG_FCOE_ENABLED; + adapter->ring_feature[RING_F_FCOE].indices = + IXGBE_FCRETA_SIZE; + netdev->features |= NETIF_F_FSO | + NETIF_F_FCOE_CRC | + NETIF_F_FCOE_MTU; + netdev->fcoe_ddp_xid = IXGBE_FCOE_DDP_MAX - 1; + } +#endif /* HAVE_NETDEV_OPS_FCOE_ENABLE */ +#ifdef HAVE_NETDEV_VLAN_FEATURES + netdev->vlan_features |= NETIF_F_FSO | + NETIF_F_FCOE_CRC | + NETIF_F_FCOE_MTU; +#endif /* HAVE_NETDEV_VLAN_FEATURES */ + } +#endif /* NETIF_F_FSO */ +#endif /* IXGBE_FCOE */ + +#endif /* MAX_SKB_FRAGS */ + /* make sure the EEPROM is good */ + if (hw->eeprom.ops.validate_checksum && + (hw->eeprom.ops.validate_checksum(hw, NULL) < 0)) { + e_dev_err("The EEPROM Checksum Is Not Valid\n"); + err = -EIO; + goto err_sw_init; + } + + memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); +#ifdef ETHTOOL_GPERMADDR + memcpy(netdev->perm_addr, hw->mac.perm_addr, netdev->addr_len); + + if (ixgbe_validate_mac_addr(netdev->perm_addr)) { + e_dev_err("invalid MAC address\n"); + err = -EIO; + goto err_sw_init; + } +#else + if (ixgbe_validate_mac_addr(netdev->dev_addr)) { + e_dev_err("invalid MAC address\n"); + err = -EIO; + goto err_sw_init; + } +#endif + memcpy(&adapter->mac_table[0].addr, hw->mac.perm_addr, + netdev->addr_len); + adapter->mac_table[0].queue = adapter->num_vfs; + adapter->mac_table[0].state = (IXGBE_MAC_STATE_DEFAULT | + IXGBE_MAC_STATE_IN_USE); + hw->mac.ops.set_rar(hw, 0, adapter->mac_table[0].addr, + adapter->mac_table[0].queue, + IXGBE_RAH_AV); + + //setup_timer(&adapter->service_timer, &ixgbe_service_timer, + // (unsigned long) adapter); + + //INIT_WORK(&adapter->service_task, ixgbe_service_task); + //clear_bit(__IXGBE_SERVICE_SCHED, &adapter->state); + + //err = ixgbe_init_interrupt_scheme(adapter); + //if (err) + // goto err_sw_init; + + //adapter->flags &= ~IXGBE_FLAG_RSS_ENABLED; + ixgbe_set_num_queues(adapter); + + adapter->wol = 0; + /* WOL not supported for all but the following */ + switch (pdev->device) { + case IXGBE_DEV_ID_82599_SFP: + /* Only these subdevice supports WOL */ + switch (pdev->subsystem_device) { + case IXGBE_SUBDEV_ID_82599_560FLR: + /* only support first port */ + if (hw->bus.func != 0) + break; + case IXGBE_SUBDEV_ID_82599_SFP: + adapter->wol = IXGBE_WUFC_MAG; + break; + } + break; + case IXGBE_DEV_ID_82599_COMBO_BACKPLANE: + /* All except this subdevice support WOL */ + if (pdev->subsystem_device != IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ) + adapter->wol = IXGBE_WUFC_MAG; + break; + case IXGBE_DEV_ID_82599_KX4: + adapter->wol = IXGBE_WUFC_MAG; + break; + case IXGBE_DEV_ID_X540T: + /* Check eeprom to see if it is enabled */ + ixgbe_read_eeprom(hw, 0x2c, &adapter->eeprom_cap); + wol_cap = adapter->eeprom_cap & IXGBE_DEVICE_CAPS_WOL_MASK; + + if ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0_1) || + ((wol_cap == IXGBE_DEVICE_CAPS_WOL_PORT0) && + (hw->bus.func == 0))) + adapter->wol = IXGBE_WUFC_MAG; + break; + } + //device_set_wakeup_enable(&adapter->pdev->dev, adapter->wol); + + + /* + * Save off EEPROM version number and Option Rom version which + * together make a unique identify for the eeprom + */ + ixgbe_read_eeprom(hw, 0x2e, &eeprom_verh); + ixgbe_read_eeprom(hw, 0x2d, &eeprom_verl); + + etrack_id = (eeprom_verh << 16) | eeprom_verl; + + ixgbe_read_eeprom(hw, 0x17, &offset); + + /* Make sure offset to SCSI block is valid */ + if (!(offset == 0x0) && !(offset == 0xffff)) { + ixgbe_read_eeprom(hw, offset + 0x84, &eeprom_cfg_blkh); + ixgbe_read_eeprom(hw, offset + 0x83, &eeprom_cfg_blkl); + + /* Only display Option Rom if exist */ + if (eeprom_cfg_blkl && eeprom_cfg_blkh) { + major = eeprom_cfg_blkl >> 8; + build = (eeprom_cfg_blkl << 8) | (eeprom_cfg_blkh >> 8); + patch = eeprom_cfg_blkh & 0x00ff; + + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x, %d.%d.%d", etrack_id, major, build, + patch); + } else { + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x", etrack_id); + } + } else { + snprintf(adapter->eeprom_id, sizeof(adapter->eeprom_id), + "0x%08x", etrack_id); + } + + /* reset the hardware with the new settings */ + err = hw->mac.ops.start_hw(hw); + if (err == IXGBE_ERR_EEPROM_VERSION) { + /* We are running on a pre-production device, log a warning */ + e_dev_warn("This device is a pre-production adapter/LOM. " + "Please be aware there may be issues associated " + "with your hardware. If you are experiencing " + "problems please contact your Intel or hardware " + "representative who provided you with this " + "hardware.\n"); + } + /* pick up the PCI bus settings for reporting later */ + if (hw->mac.ops.get_bus_info) + hw->mac.ops.get_bus_info(hw); + + strlcpy(netdev->name, "eth%d", sizeof(netdev->name)); + *lad_dev = netdev; + + adapter->netdev_registered = true; +#ifdef NO_VNIC + /* power down the optics */ + if ((hw->phy.multispeed_fiber) || + ((hw->mac.ops.get_media_type(hw) == ixgbe_media_type_fiber) && + (hw->mac.type == ixgbe_mac_82599EB))) + ixgbe_disable_tx_laser(hw); + + /* carrier off reporting is important to ethtool even BEFORE open */ + netif_carrier_off(netdev); + /* keep stopping all the transmit queues for older kernels */ + netif_tx_stop_all_queues(netdev); +#endif + + /* print all messages at the end so that we use our eth%d name */ + /* print bus type/speed/width info */ + e_dev_info("(PCI Express:%s:%s) ", + (hw->bus.speed == ixgbe_bus_speed_5000 ? "5.0GT/s" : + hw->bus.speed == ixgbe_bus_speed_2500 ? "2.5GT/s" : + "Unknown"), + (hw->bus.width == ixgbe_bus_width_pcie_x8 ? "Width x8" : + hw->bus.width == ixgbe_bus_width_pcie_x4 ? "Width x4" : + hw->bus.width == ixgbe_bus_width_pcie_x1 ? "Width x1" : + "Unknown")); + + /* print the MAC address */ + for (i = 0; i < 6; i++) + pr_cont("%2.2x%c", netdev->dev_addr[i], i == 5 ? '\n' : ':'); + + /* First try to read PBA as a string */ + err = ixgbe_read_pba_string(hw, part_str, IXGBE_PBANUM_LENGTH); + if (err) + strlcpy(part_str, "Unknown", sizeof(part_str)); + if (ixgbe_is_sfp(hw) && hw->phy.sfp_type != ixgbe_sfp_type_not_present) + e_info(probe, "MAC: %d, PHY: %d, SFP+: %d, PBA No: %s\n", + hw->mac.type, hw->phy.type, hw->phy.sfp_type, part_str); + else + e_info(probe, "MAC: %d, PHY: %d, PBA No: %s\n", + hw->mac.type, hw->phy.type, part_str); + + if (((hw->bus.speed == ixgbe_bus_speed_2500) && + (hw->bus.width <= ixgbe_bus_width_pcie_x4)) || + (hw->bus.width <= ixgbe_bus_width_pcie_x2)) { + e_dev_warn("PCI-Express bandwidth available for this " + "card is not sufficient for optimal " + "performance.\n"); + e_dev_warn("For optimal performance a x8 PCI-Express " + "slot is required.\n"); + } + +#define INFO_STRING_LEN 255 + info_string = kzalloc(INFO_STRING_LEN, GFP_KERNEL); + if (!info_string) { + e_err(probe, "allocation for info string failed\n"); + goto no_info_string; + } + count = 0; + i_s_var = info_string; + count += snprintf(i_s_var, INFO_STRING_LEN, "Enabled Features: "); + + i_s_var = info_string + count; + count += snprintf(i_s_var, (INFO_STRING_LEN - count), + "RxQ: %d TxQ: %d ", adapter->num_rx_queues, + adapter->num_tx_queues); + i_s_var = info_string + count; +#ifdef IXGBE_FCOE + if (adapter->flags & IXGBE_FLAG_FCOE_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "FCoE "); + i_s_var = info_string + count; + } +#endif + if (adapter->flags & IXGBE_FLAG_FDIR_HASH_CAPABLE) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, + "FdirHash "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_FDIR_PERFECT_CAPABLE) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, + "FdirPerfect "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCB "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_RSS_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSS "); + i_s_var = info_string + count; + } + if (adapter->flags & IXGBE_FLAG_DCA_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "DCA "); + i_s_var = info_string + count; + } + if (adapter->flags2 & IXGBE_FLAG2_RSC_ENABLED) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "RSC "); + i_s_var = info_string + count; + } +#ifndef IXGBE_NO_LRO + else if (netdev->features & NETIF_F_LRO) { + count += snprintf(i_s_var, INFO_STRING_LEN - count, "LRO "); + i_s_var = info_string + count; + } +#endif + + BUG_ON(i_s_var > (info_string + INFO_STRING_LEN)); + /* end features printing */ + e_info(probe, "%s\n", info_string); + kfree(info_string); +no_info_string: + + /* firmware requires blank driver version */ + ixgbe_set_fw_drv_ver(hw, 0xFF, 0xFF, 0xFF, 0xFF); + +#if defined(HAVE_NETDEV_STORAGE_ADDRESS) && defined(NETDEV_HW_ADDR_T_SAN) + /* add san mac addr to netdev */ + //ixgbe_add_sanmac_netdev(netdev); + +#endif /* (HAVE_NETDEV_STORAGE_ADDRESS) && (NETDEV_HW_ADDR_T_SAN) */ + e_info(probe, "Intel(R) 10 Gigabit Network Connection\n"); + cards_found++; + +#ifdef IXGBE_SYSFS + //if (ixgbe_sysfs_init(adapter)) + // e_err(probe, "failed to allocate sysfs resources\n"); +#else +#ifdef IXGBE_PROCFS + //if (ixgbe_procfs_init(adapter)) + // e_err(probe, "failed to allocate procfs resources\n"); +#endif /* IXGBE_PROCFS */ +#endif /* IXGBE_SYSFS */ + + return 0; + +//err_register: + //ixgbe_clear_interrupt_scheme(adapter); + //ixgbe_release_hw_control(adapter); +err_sw_init: + adapter->flags2 &= ~IXGBE_FLAG2_SEARCH_FOR_SFP; + if (adapter->mac_table) + kfree(adapter->mac_table); + iounmap(hw->hw_addr); +err_ioremap: + free_netdev(netdev); +err_alloc_etherdev: + //pci_release_selected_regions(pdev, + // pci_select_bars(pdev, IORESOURCE_MEM)); +//err_pci_reg: +//err_dma: + pci_disable_device(pdev); + return err; +} + +/** + * ixgbe_remove - Device Removal Routine + * @pdev: PCI device information struct + * + * ixgbe_remove is called by the PCI subsystem to alert the driver + * that it should release a PCI device. The could be caused by a + * Hot-Plug event, or because the driver is going to be removed from + * memory. + **/ +void ixgbe_kni_remove(struct pci_dev *pdev) +{ + pci_disable_device(pdev); +} + + +u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg) +{ + u16 value; + struct ixgbe_adapter *adapter = hw->back; + + pci_read_config_word(adapter->pdev, reg, &value); + return value; +} + +void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value) +{ + struct ixgbe_adapter *adapter = hw->back; + + pci_write_config_word(adapter->pdev, reg, value); +} + +void ewarn(struct ixgbe_hw *hw, const char *st, u32 status) +{ + struct ixgbe_adapter *adapter = hw->back; + + netif_warn(adapter, drv, adapter->netdev, "%s", st); +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h new file mode 100644 index 00000000..124f00de --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_mbx.h @@ -0,0 +1,105 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_MBX_H_ +#define _IXGBE_MBX_H_ + +#include "ixgbe_type.h" + +#define IXGBE_VFMAILBOX_SIZE 16 /* 16 32 bit words - 64 bytes */ +#define IXGBE_ERR_MBX -100 + +#define IXGBE_VFMAILBOX 0x002FC +#define IXGBE_VFMBMEM 0x00200 + +/* Define mailbox register bits */ +#define IXGBE_VFMAILBOX_REQ 0x00000001 /* Request for PF Ready bit */ +#define IXGBE_VFMAILBOX_ACK 0x00000002 /* Ack PF message received */ +#define IXGBE_VFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define IXGBE_VFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define IXGBE_VFMAILBOX_PFSTS 0x00000010 /* PF wrote a message in the MB */ +#define IXGBE_VFMAILBOX_PFACK 0x00000020 /* PF ack the previous VF msg */ +#define IXGBE_VFMAILBOX_RSTI 0x00000040 /* PF has reset indication */ +#define IXGBE_VFMAILBOX_RSTD 0x00000080 /* PF has indicated reset done */ +#define IXGBE_VFMAILBOX_R2C_BITS 0x000000B0 /* All read to clear bits */ + +#define IXGBE_PFMAILBOX_STS 0x00000001 /* Initiate message send to VF */ +#define IXGBE_PFMAILBOX_ACK 0x00000002 /* Ack message recv'd from VF */ +#define IXGBE_PFMAILBOX_VFU 0x00000004 /* VF owns the mailbox buffer */ +#define IXGBE_PFMAILBOX_PFU 0x00000008 /* PF owns the mailbox buffer */ +#define IXGBE_PFMAILBOX_RVFU 0x00000010 /* Reset VFU - used when VF stuck */ + +#define IXGBE_MBVFICR_VFREQ_MASK 0x0000FFFF /* bits for VF messages */ +#define IXGBE_MBVFICR_VFREQ_VF1 0x00000001 /* bit for VF 1 message */ +#define IXGBE_MBVFICR_VFACK_MASK 0xFFFF0000 /* bits for VF acks */ +#define IXGBE_MBVFICR_VFACK_VF1 0x00010000 /* bit for VF 1 ack */ + + +/* If it's a IXGBE_VF_* msg then it originates in the VF and is sent to the + * PF. The reverse is true if it is IXGBE_PF_*. + * Message ACK's are the value or'd with 0xF0000000 + */ +#define IXGBE_VT_MSGTYPE_ACK 0x80000000 /* Messages below or'd with + * this are the ACK */ +#define IXGBE_VT_MSGTYPE_NACK 0x40000000 /* Messages below or'd with + * this are the NACK */ +#define IXGBE_VT_MSGTYPE_CTS 0x20000000 /* Indicates that VF is still + * clear to send requests */ +#define IXGBE_VT_MSGINFO_SHIFT 16 +/* bits 23:16 are used for extra info for certain messages */ +#define IXGBE_VT_MSGINFO_MASK (0xFF << IXGBE_VT_MSGINFO_SHIFT) + +#define IXGBE_VF_RESET 0x01 /* VF requests reset */ +#define IXGBE_VF_SET_MAC_ADDR 0x02 /* VF requests PF to set MAC addr */ +#define IXGBE_VF_SET_MULTICAST 0x03 /* VF requests PF to set MC addr */ +#define IXGBE_VF_SET_VLAN 0x04 /* VF requests PF to set VLAN */ +#define IXGBE_VF_SET_LPE 0x05 /* VF requests PF to set VMOLR.LPE */ +#define IXGBE_VF_SET_MACVLAN 0x06 /* VF requests PF for unicast filter */ + +/* length of permanent address message returned from PF */ +#define IXGBE_VF_PERMADDR_MSG_LEN 4 +/* word in permanent address message with the current multicast type */ +#define IXGBE_VF_MC_TYPE_WORD 3 + +#define IXGBE_PF_CONTROL_MSG 0x0100 /* PF control message */ + + +#define IXGBE_VF_MBX_INIT_TIMEOUT 2000 /* number of retries on mailbox */ +#define IXGBE_VF_MBX_INIT_DELAY 500 /* microseconds between retries */ + +s32 ixgbe_read_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_write_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_read_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_write_posted_mbx(struct ixgbe_hw *, u32 *, u16, u16); +s32 ixgbe_check_for_msg(struct ixgbe_hw *, u16); +s32 ixgbe_check_for_ack(struct ixgbe_hw *, u16); +s32 ixgbe_check_for_rst(struct ixgbe_hw *, u16); +void ixgbe_init_mbx_ops_generic(struct ixgbe_hw *hw); +void ixgbe_init_mbx_params_vf(struct ixgbe_hw *); +void ixgbe_init_mbx_params_pf(struct ixgbe_hw *); + +#endif /* _IXGBE_MBX_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h new file mode 100644 index 00000000..d161600b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_osdep.h @@ -0,0 +1,132 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +/* glue for the OS independent part of ixgbe + * includes register access macros + */ + +#ifndef _IXGBE_OSDEP_H_ +#define _IXGBE_OSDEP_H_ + +#include +#include +#include +#include +#include +#include "kcompat.h" + + +#ifndef msleep +#define msleep(x) do { if (in_interrupt()) { \ + /* Don't mdelay in interrupt context! */ \ + BUG(); \ + } else { \ + msleep(x); \ + } } while (0) + +#endif + +#undef ASSERT + +#ifdef DBG +#define hw_dbg(hw, S, A...) printk(KERN_DEBUG S, ## A) +#else +#define hw_dbg(hw, S, A...) do {} while (0) +#endif + +#define e_dev_info(format, arg...) \ + dev_info(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_dev_warn(format, arg...) \ + dev_warn(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_dev_err(format, arg...) \ + dev_err(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_dev_notice(format, arg...) \ + dev_notice(pci_dev_to_dev(adapter->pdev), format, ## arg) +#define e_info(msglvl, format, arg...) \ + netif_info(adapter, msglvl, adapter->netdev, format, ## arg) +#define e_err(msglvl, format, arg...) \ + netif_err(adapter, msglvl, adapter->netdev, format, ## arg) +#define e_warn(msglvl, format, arg...) \ + netif_warn(adapter, msglvl, adapter->netdev, format, ## arg) +#define e_crit(msglvl, format, arg...) \ + netif_crit(adapter, msglvl, adapter->netdev, format, ## arg) + + +#ifdef DBG +#define IXGBE_WRITE_REG(a, reg, value) do {\ + switch (reg) { \ + case IXGBE_EIMS: \ + case IXGBE_EIMC: \ + case IXGBE_EIAM: \ + case IXGBE_EIAC: \ + case IXGBE_EICR: \ + case IXGBE_EICS: \ + printk("%s: Reg - 0x%05X, value - 0x%08X\n", __func__, \ + reg, (u32)(value)); \ + default: \ + break; \ + } \ + writel((value), ((a)->hw_addr + (reg))); \ +} while (0) +#else +#define IXGBE_WRITE_REG(a, reg, value) writel((value), ((a)->hw_addr + (reg))) +#endif + +#define IXGBE_READ_REG(a, reg) readl((a)->hw_addr + (reg)) + +#define IXGBE_WRITE_REG_ARRAY(a, reg, offset, value) ( \ + writel((value), ((a)->hw_addr + (reg) + ((offset) << 2)))) + +#define IXGBE_READ_REG_ARRAY(a, reg, offset) ( \ + readl((a)->hw_addr + (reg) + ((offset) << 2))) + +#ifndef writeq +#define writeq(val, addr) do { writel((u32) (val), addr); \ + writel((u32) (val >> 32), (addr + 4)); \ + } while (0); +#endif + +#define IXGBE_WRITE_REG64(a, reg, value) writeq((value), ((a)->hw_addr + (reg))) + +#define IXGBE_WRITE_FLUSH(a) IXGBE_READ_REG(a, IXGBE_STATUS) +struct ixgbe_hw; +extern u16 ixgbe_read_pci_cfg_word(struct ixgbe_hw *hw, u32 reg); +extern void ixgbe_write_pci_cfg_word(struct ixgbe_hw *hw, u32 reg, u16 value); +extern void ewarn(struct ixgbe_hw *hw, const char *str, u32 status); + +#define IXGBE_READ_PCIE_WORD ixgbe_read_pci_cfg_word +#define IXGBE_WRITE_PCIE_WORD ixgbe_write_pci_cfg_word +#define IXGBE_EEPROM_GRANT_ATTEMPS 100 +#define IXGBE_HTONL(_i) htonl(_i) +#define IXGBE_NTOHL(_i) ntohl(_i) +#define IXGBE_NTOHS(_i) ntohs(_i) +#define IXGBE_CPU_TO_LE32(_i) cpu_to_le32(_i) +#define IXGBE_LE32_TO_CPUS(_i) le32_to_cpus(_i) +#define EWARN(H, W, S) ewarn(H, W, S) + +#endif /* _IXGBE_OSDEP_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c new file mode 100644 index 00000000..e3f5275e --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.c @@ -0,0 +1,1847 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static void ixgbe_i2c_start(struct ixgbe_hw *hw); +static void ixgbe_i2c_stop(struct ixgbe_hw *hw); +static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data); +static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data); +static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw); +static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data); +static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data); +static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl); +static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl); +static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data); +static bool ixgbe_get_i2c_data(u32 *i2cctl); + +/** + * ixgbe_init_phy_ops_generic - Inits PHY function ptrs + * @hw: pointer to the hardware structure + * + * Initialize the function pointers. + **/ +s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw) +{ + struct ixgbe_phy_info *phy = &hw->phy; + + /* PHY */ + phy->ops.identify = &ixgbe_identify_phy_generic; + phy->ops.reset = &ixgbe_reset_phy_generic; + phy->ops.read_reg = &ixgbe_read_phy_reg_generic; + phy->ops.write_reg = &ixgbe_write_phy_reg_generic; + phy->ops.setup_link = &ixgbe_setup_phy_link_generic; + phy->ops.setup_link_speed = &ixgbe_setup_phy_link_speed_generic; + phy->ops.check_link = NULL; + phy->ops.get_firmware_version = ixgbe_get_phy_firmware_version_generic; + phy->ops.read_i2c_byte = &ixgbe_read_i2c_byte_generic; + phy->ops.write_i2c_byte = &ixgbe_write_i2c_byte_generic; + phy->ops.read_i2c_eeprom = &ixgbe_read_i2c_eeprom_generic; + phy->ops.write_i2c_eeprom = &ixgbe_write_i2c_eeprom_generic; + phy->ops.i2c_bus_clear = &ixgbe_i2c_bus_clear; + phy->ops.identify_sfp = &ixgbe_identify_module_generic; + phy->sfp_type = ixgbe_sfp_type_unknown; + phy->ops.check_overtemp = &ixgbe_tn_check_overtemp; + return 0; +} + +/** + * ixgbe_identify_phy_generic - Get physical layer module + * @hw: pointer to hardware structure + * + * Determines the physical layer module found on the current adapter. + **/ +s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + u32 phy_addr; + u16 ext_ability = 0; + + if (hw->phy.type == ixgbe_phy_unknown) { + for (phy_addr = 0; phy_addr < IXGBE_MAX_PHY_ADDR; phy_addr++) { + if (ixgbe_validate_phy_addr(hw, phy_addr)) { + hw->phy.addr = phy_addr; + ixgbe_get_phy_id(hw); + hw->phy.type = + ixgbe_get_phy_type_from_id(hw->phy.id); + + if (hw->phy.type == ixgbe_phy_unknown) { + hw->phy.ops.read_reg(hw, + IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &ext_ability); + if (ext_ability & + (IXGBE_MDIO_PHY_10GBASET_ABILITY | + IXGBE_MDIO_PHY_1000BASET_ABILITY)) + hw->phy.type = + ixgbe_phy_cu_unknown; + else + hw->phy.type = + ixgbe_phy_generic; + } + + status = 0; + break; + } + } + /* clear value if nothing found */ + if (status != 0) + hw->phy.addr = 0; + } else { + status = 0; + } + + return status; +} + +/** + * ixgbe_validate_phy_addr - Determines phy address is valid + * @hw: pointer to hardware structure + * + **/ +bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr) +{ + u16 phy_id = 0; + bool valid = false; + + hw->phy.addr = phy_addr; + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_id); + + if (phy_id != 0xFFFF && phy_id != 0x0) + valid = true; + + return valid; +} + +/** + * ixgbe_get_phy_id - Get the phy type + * @hw: pointer to hardware structure + * + **/ +s32 ixgbe_get_phy_id(struct ixgbe_hw *hw) +{ + u32 status; + u16 phy_id_high = 0; + u16 phy_id_low = 0; + + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_HIGH, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &phy_id_high); + + if (status == 0) { + hw->phy.id = (u32)(phy_id_high << 16); + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_ID_LOW, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &phy_id_low); + hw->phy.id |= (u32)(phy_id_low & IXGBE_PHY_REVISION_MASK); + hw->phy.revision = (u32)(phy_id_low & ~IXGBE_PHY_REVISION_MASK); + } + return status; +} + +/** + * ixgbe_get_phy_type_from_id - Get the phy type + * @hw: pointer to hardware structure + * + **/ +enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id) +{ + enum ixgbe_phy_type phy_type; + + switch (phy_id) { + case TN1010_PHY_ID: + phy_type = ixgbe_phy_tn; + break; + case X540_PHY_ID: + phy_type = ixgbe_phy_aq; + break; + case QT2022_PHY_ID: + phy_type = ixgbe_phy_qt; + break; + case ATH_PHY_ID: + phy_type = ixgbe_phy_nl; + break; + default: + phy_type = ixgbe_phy_unknown; + break; + } + + hw_dbg(hw, "phy type found is %d\n", phy_type); + return phy_type; +} + +/** + * ixgbe_reset_phy_generic - Performs a PHY reset + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw) +{ + u32 i; + u16 ctrl = 0; + s32 status = 0; + + if (hw->phy.type == ixgbe_phy_unknown) + status = ixgbe_identify_phy_generic(hw); + + if (status != 0 || hw->phy.type == ixgbe_phy_none) + goto out; + + /* Don't reset PHY if it's shut down due to overtemp. */ + if (!hw->phy.reset_if_overtemp && + (IXGBE_ERR_OVERTEMP == hw->phy.ops.check_overtemp(hw))) + goto out; + + /* + * Perform soft PHY reset to the PHY_XS. + * This will cause a soft reset to the PHY + */ + hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, + IXGBE_MDIO_PHY_XS_RESET); + + /* + * Poll for reset bit to self-clear indicating reset is complete. + * Some PHYs could take up to 3 seconds to complete and need about + * 1.7 usec delay after the reset is complete. + */ + for (i = 0; i < 30; i++) { + msleep(100); + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, &ctrl); + if (!(ctrl & IXGBE_MDIO_PHY_XS_RESET)) { + udelay(2); + break; + } + } + + if (ctrl & IXGBE_MDIO_PHY_XS_RESET) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "PHY reset polling failed to complete.\n"); + } + +out: + return status; +} + +/** + * ixgbe_read_phy_reg_generic - Reads a value from a specified PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit address of PHY register to read + * @phy_data: Pointer to read data from PHY register + **/ +s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 *phy_data) +{ + u32 command; + u32 i; + u32 data; + s32 status = 0; + u16 gssr; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + gssr = IXGBE_GSSR_PHY1_SM; + else + gssr = IXGBE_GSSR_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) + status = IXGBE_ERR_SWFW_SYNC; + + if (status == 0) { + /* Setup and write the address cycle command */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle completed. + * The MDI Command bit will clear when the operation is + * complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY address command did not complete.\n"); + status = IXGBE_ERR_PHY; + } + + if (status == 0) { + /* + * Address cycle complete, setup and write the read + * command + */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_READ | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle + * completed. The MDI Command bit will clear when the + * operation is complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY read command didn't complete\n"); + status = IXGBE_ERR_PHY; + } else { + /* + * Read operation is complete. Get the data + * from MSRWD + */ + data = IXGBE_READ_REG(hw, IXGBE_MSRWD); + data >>= IXGBE_MSRWD_READ_DATA_SHIFT; + *phy_data = (u16)(data); + } + } + + hw->mac.ops.release_swfw_sync(hw, gssr); + } + + return status; +} + +/** + * ixgbe_write_phy_reg_generic - Writes a value to specified PHY register + * @hw: pointer to hardware structure + * @reg_addr: 32 bit PHY register to write + * @device_type: 5 bit device type + * @phy_data: Data to write to the PHY register + **/ +s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 phy_data) +{ + u32 command; + u32 i; + s32 status = 0; + u16 gssr; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + gssr = IXGBE_GSSR_PHY1_SM; + else + gssr = IXGBE_GSSR_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, gssr) != 0) + status = IXGBE_ERR_SWFW_SYNC; + + if (status == 0) { + /* Put the data in the MDI single read and write data register*/ + IXGBE_WRITE_REG(hw, IXGBE_MSRWD, (u32)phy_data); + + /* Setup and write the address cycle command */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_ADDR_CYCLE | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle completed. + * The MDI Command bit will clear when the operation is + * complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY address cmd didn't complete\n"); + status = IXGBE_ERR_PHY; + } + + if (status == 0) { + /* + * Address cycle complete, setup and write the write + * command + */ + command = ((reg_addr << IXGBE_MSCA_NP_ADDR_SHIFT) | + (device_type << IXGBE_MSCA_DEV_TYPE_SHIFT) | + (hw->phy.addr << IXGBE_MSCA_PHY_ADDR_SHIFT) | + (IXGBE_MSCA_WRITE | IXGBE_MSCA_MDI_COMMAND)); + + IXGBE_WRITE_REG(hw, IXGBE_MSCA, command); + + /* + * Check every 10 usec to see if the address cycle + * completed. The MDI Command bit will clear when the + * operation is complete + */ + for (i = 0; i < IXGBE_MDIO_COMMAND_TIMEOUT; i++) { + udelay(10); + + command = IXGBE_READ_REG(hw, IXGBE_MSCA); + + if ((command & IXGBE_MSCA_MDI_COMMAND) == 0) + break; + } + + if ((command & IXGBE_MSCA_MDI_COMMAND) != 0) { + hw_dbg(hw, "PHY address cmd didn't complete\n"); + status = IXGBE_ERR_PHY; + } + } + + hw->mac.ops.release_swfw_sync(hw, gssr); + } + + return status; +} + +/** + * ixgbe_setup_phy_link_generic - Set and restart autoneg + * @hw: pointer to hardware structure + * + * Restart autonegotiation and PHY and waits for completion. + **/ +s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 time_out; + u32 max_time_out = 10; + u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; + bool autoneg = false; + ixgbe_link_speed speed; + + ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) + autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { + /* Set or unset auto-negotiation 1G advertisement */ + hw->phy.ops.read_reg(hw, + IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, + IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_100_FULL) { + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~(IXGBE_MII_100BASE_T_ADVERTISE | + IXGBE_MII_100BASE_T_ADVERTISE_HALF); + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) + autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + /* Restart PHY autonegotiation and wait for completion */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg); + + autoneg_reg |= IXGBE_MII_RESTART; + + hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg); + + /* Wait for autonegotiation to finish */ + for (time_out = 0; time_out < max_time_out; time_out++) { + udelay(10); + /* Restart PHY autonegotiation and wait for completion */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; + if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) + break; + } + + if (time_out == max_time_out) { + status = IXGBE_ERR_LINK_SETUP; + hw_dbg(hw, "ixgbe_setup_phy_link_generic: time out"); + } + + return status; +} + +/** + * ixgbe_setup_phy_link_speed_generic - Sets the auto advertised capabilities + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + **/ +s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete) +{ + + /* + * Clear autoneg_advertised and set new values based on input link + * speed. + */ + hw->phy.autoneg_advertised = 0; + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_10GB_FULL; + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_1GB_FULL; + + if (speed & IXGBE_LINK_SPEED_100_FULL) + hw->phy.autoneg_advertised |= IXGBE_LINK_SPEED_100_FULL; + + /* Setup link based on the new speed settings */ + hw->phy.ops.setup_link(hw); + + return 0; +} + +/** + * ixgbe_get_copper_link_capabilities_generic - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: boolean auto-negotiation value + * + * Determines the link capabilities by reading the AUTOC register. + **/ +s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + s32 status = IXGBE_ERR_LINK_SETUP; + u16 speed_ability; + + *speed = 0; + *autoneg = true; + + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_SPEED_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, + &speed_ability); + + if (status == 0) { + if (speed_ability & IXGBE_MDIO_PHY_SPEED_10G) + *speed |= IXGBE_LINK_SPEED_10GB_FULL; + if (speed_ability & IXGBE_MDIO_PHY_SPEED_1G) + *speed |= IXGBE_LINK_SPEED_1GB_FULL; + if (speed_ability & IXGBE_MDIO_PHY_SPEED_100M) + *speed |= IXGBE_LINK_SPEED_100_FULL; + } + + return status; +} + +/** + * ixgbe_check_phy_link_tnx - Determine link and speed status + * @hw: pointer to hardware structure + * + * Reads the VS1 register to determine if link is up and the current speed for + * the PHY. + **/ +s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, ixgbe_link_speed *speed, + bool *link_up) +{ + s32 status = 0; + u32 time_out; + u32 max_time_out = 10; + u16 phy_link = 0; + u16 phy_speed = 0; + u16 phy_data = 0; + + /* Initialize speed and link to default case */ + *link_up = false; + *speed = IXGBE_LINK_SPEED_10GB_FULL; + + /* + * Check current speed and link status of the PHY register. + * This is a vendor specific register and may have to + * be changed for other copper PHYs. + */ + for (time_out = 0; time_out < max_time_out; time_out++) { + udelay(10); + status = hw->phy.ops.read_reg(hw, + IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + &phy_data); + phy_link = phy_data & IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS; + phy_speed = phy_data & + IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS; + if (phy_link == IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS) { + *link_up = true; + if (phy_speed == + IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS) + *speed = IXGBE_LINK_SPEED_1GB_FULL; + break; + } + } + + return status; +} + +/** + * ixgbe_setup_phy_link_tnx - Set and restart autoneg + * @hw: pointer to hardware structure + * + * Restart autonegotiation and PHY and waits for completion. + **/ +s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 time_out; + u32 max_time_out = 10; + u16 autoneg_reg = IXGBE_MII_AUTONEG_REG; + bool autoneg = false; + ixgbe_link_speed speed; + + ixgbe_get_copper_link_capabilities_generic(hw, &speed, &autoneg); + + if (speed & IXGBE_LINK_SPEED_10GB_FULL) { + /* Set or unset auto-negotiation 10G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_10GBASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_10GB_FULL) + autoneg_reg |= IXGBE_MII_10GBASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_1GB_FULL) { + /* Set or unset auto-negotiation 1G advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_1GB_FULL) + autoneg_reg |= IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX; + + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_XNP_TX_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + if (speed & IXGBE_LINK_SPEED_100_FULL) { + /* Set or unset auto-negotiation 100M advertisement */ + hw->phy.ops.read_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= ~IXGBE_MII_100BASE_T_ADVERTISE; + if (hw->phy.autoneg_advertised & IXGBE_LINK_SPEED_100_FULL) + autoneg_reg |= IXGBE_MII_100BASE_T_ADVERTISE; + + hw->phy.ops.write_reg(hw, IXGBE_MII_AUTONEG_ADVERTISE_REG, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + autoneg_reg); + } + + /* Restart PHY autonegotiation and wait for completion */ + hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, &autoneg_reg); + + autoneg_reg |= IXGBE_MII_RESTART; + + hw->phy.ops.write_reg(hw, IXGBE_MDIO_AUTO_NEG_CONTROL, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, autoneg_reg); + + /* Wait for autonegotiation to finish */ + for (time_out = 0; time_out < max_time_out; time_out++) { + udelay(10); + /* Restart PHY autonegotiation and wait for completion */ + status = hw->phy.ops.read_reg(hw, IXGBE_MDIO_AUTO_NEG_STATUS, + IXGBE_MDIO_AUTO_NEG_DEV_TYPE, + &autoneg_reg); + + autoneg_reg &= IXGBE_MII_AUTONEG_COMPLETE; + if (autoneg_reg == IXGBE_MII_AUTONEG_COMPLETE) + break; + } + + if (time_out == max_time_out) { + status = IXGBE_ERR_LINK_SETUP; + hw_dbg(hw, "ixgbe_setup_phy_link_tnx: time out"); + } + + return status; +} + +/** + * ixgbe_get_phy_firmware_version_tnx - Gets the PHY Firmware Version + * @hw: pointer to hardware structure + * @firmware_version: pointer to the PHY Firmware Version + **/ +s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, + u16 *firmware_version) +{ + s32 status = 0; + + status = hw->phy.ops.read_reg(hw, TNX_FW_REV, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + firmware_version); + + return status; +} + +/** + * ixgbe_get_phy_firmware_version_generic - Gets the PHY Firmware Version + * @hw: pointer to hardware structure + * @firmware_version: pointer to the PHY Firmware Version + **/ +s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, + u16 *firmware_version) +{ + s32 status = 0; + + status = hw->phy.ops.read_reg(hw, AQ_FW_REV, + IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE, + firmware_version); + + return status; +} + +/** + * ixgbe_reset_phy_nl - Performs a PHY reset + * @hw: pointer to hardware structure + **/ +s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw) +{ + u16 phy_offset, control, eword, edata, block_crc; + bool end_data = false; + u16 list_offset, data_offset; + u16 phy_data = 0; + s32 ret_val = 0; + u32 i; + + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data); + + /* reset the PHY and poll for completion */ + hw->phy.ops.write_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, + (phy_data | IXGBE_MDIO_PHY_XS_RESET)); + + for (i = 0; i < 100; i++) { + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_XS_CONTROL, + IXGBE_MDIO_PHY_XS_DEV_TYPE, &phy_data); + if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) == 0) + break; + msleep(10); + } + + if ((phy_data & IXGBE_MDIO_PHY_XS_RESET) != 0) { + hw_dbg(hw, "PHY reset did not complete.\n"); + ret_val = IXGBE_ERR_PHY; + goto out; + } + + /* Get init offsets */ + ret_val = ixgbe_get_sfp_init_sequence_offsets(hw, &list_offset, + &data_offset); + if (ret_val != 0) + goto out; + + ret_val = hw->eeprom.ops.read(hw, data_offset, &block_crc); + data_offset++; + while (!end_data) { + /* + * Read control word from PHY init contents offset + */ + ret_val = hw->eeprom.ops.read(hw, data_offset, &eword); + control = (eword & IXGBE_CONTROL_MASK_NL) >> + IXGBE_CONTROL_SHIFT_NL; + edata = eword & IXGBE_DATA_MASK_NL; + switch (control) { + case IXGBE_DELAY_NL: + data_offset++; + hw_dbg(hw, "DELAY: %d MS\n", edata); + msleep(edata); + break; + case IXGBE_DATA_NL: + hw_dbg(hw, "DATA:\n"); + data_offset++; + hw->eeprom.ops.read(hw, data_offset++, + &phy_offset); + for (i = 0; i < edata; i++) { + hw->eeprom.ops.read(hw, data_offset, &eword); + hw->phy.ops.write_reg(hw, phy_offset, + IXGBE_TWINAX_DEV, eword); + hw_dbg(hw, "Wrote %4.4x to %4.4x\n", eword, + phy_offset); + data_offset++; + phy_offset++; + } + break; + case IXGBE_CONTROL_NL: + data_offset++; + hw_dbg(hw, "CONTROL:\n"); + if (edata == IXGBE_CONTROL_EOL_NL) { + hw_dbg(hw, "EOL\n"); + end_data = true; + } else if (edata == IXGBE_CONTROL_SOL_NL) { + hw_dbg(hw, "SOL\n"); + } else { + hw_dbg(hw, "Bad control value\n"); + ret_val = IXGBE_ERR_PHY; + goto out; + } + break; + default: + hw_dbg(hw, "Bad control type\n"); + ret_val = IXGBE_ERR_PHY; + goto out; + } + } + +out: + return ret_val; +} + +/** + * ixgbe_identify_module_generic - Identifies module type + * @hw: pointer to hardware structure + * + * Determines HW type and calls appropriate function. + **/ +s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_SFP_NOT_PRESENT; + + switch (hw->mac.ops.get_media_type(hw)) { + case ixgbe_media_type_fiber: + status = ixgbe_identify_sfp_module_generic(hw); + break; + + case ixgbe_media_type_fiber_qsfp: + status = ixgbe_identify_qsfp_module_generic(hw); + break; + + default: + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + break; + } + + return status; +} + +/** + * ixgbe_identify_sfp_module_generic - Identifies SFP modules + * @hw: pointer to hardware structure + * + * Searches for and identifies the SFP module and assigns appropriate PHY type. + **/ +s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_PHY_ADDR_INVALID; + u32 vendor_oui = 0; + enum ixgbe_sfp_type stored_sfp_type = hw->phy.sfp_type; + u8 identifier = 0; + u8 comp_codes_1g = 0; + u8 comp_codes_10g = 0; + u8 oui_bytes[3] = {0, 0, 0}; + u8 cable_tech = 0; + u8 cable_spec = 0; + u16 enforce_sfp = 0; + + if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber) { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + goto out; + } + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_IDENTIFIER, + &identifier); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + /* LAN ID is needed for sfp_type determination */ + hw->mac.ops.set_lan_id(hw); + + if (identifier != IXGBE_SFF_IDENTIFIER_SFP) { + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + } else { + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_1GBE_COMP_CODES, + &comp_codes_1g); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_10GBE_COMP_CODES, + &comp_codes_10g); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_CABLE_TECHNOLOGY, + &cable_tech); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + /* ID Module + * ========= + * 0 SFP_DA_CU + * 1 SFP_SR + * 2 SFP_LR + * 3 SFP_DA_CORE0 - 82599-specific + * 4 SFP_DA_CORE1 - 82599-specific + * 5 SFP_SR/LR_CORE0 - 82599-specific + * 6 SFP_SR/LR_CORE1 - 82599-specific + * 7 SFP_act_lmt_DA_CORE0 - 82599-specific + * 8 SFP_act_lmt_DA_CORE1 - 82599-specific + * 9 SFP_1g_cu_CORE0 - 82599-specific + * 10 SFP_1g_cu_CORE1 - 82599-specific + * 11 SFP_1g_sx_CORE0 - 82599-specific + * 12 SFP_1g_sx_CORE1 - 82599-specific + */ + if (hw->mac.type == ixgbe_mac_82598EB) { + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.sfp_type = ixgbe_sfp_type_da_cu; + else if (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE) + hw->phy.sfp_type = ixgbe_sfp_type_sr; + else if (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE) + hw->phy.sfp_type = ixgbe_sfp_type_lr; + else + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + } else if (hw->mac.type == ixgbe_mac_82599EB) { + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_da_cu_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_da_cu_core1; + } else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) { + hw->phy.ops.read_i2c_eeprom( + hw, IXGBE_SFF_CABLE_SPEC_COMP, + &cable_spec); + if (cable_spec & + IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_da_act_lmt_core1; + } else { + hw->phy.sfp_type = + ixgbe_sfp_type_unknown; + } + } else if (comp_codes_10g & + (IXGBE_SFF_10GBASESR_CAPABLE | + IXGBE_SFF_10GBASELR_CAPABLE)) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_srlr_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_srlr_core1; + } else if (comp_codes_1g & IXGBE_SFF_1GBASET_CAPABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_cu_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_cu_core1; + } else if (comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) { + if (hw->bus.lan_id == 0) + hw->phy.sfp_type = + ixgbe_sfp_type_1g_sx_core0; + else + hw->phy.sfp_type = + ixgbe_sfp_type_1g_sx_core1; + } else { + hw->phy.sfp_type = ixgbe_sfp_type_unknown; + } + } + + if (hw->phy.sfp_type != stored_sfp_type) + hw->phy.sfp_setup_needed = true; + + /* Determine if the SFP+ PHY is dual speed or not. */ + hw->phy.multispeed_fiber = false; + if (((comp_codes_1g & IXGBE_SFF_1GBASESX_CAPABLE) && + (comp_codes_10g & IXGBE_SFF_10GBASESR_CAPABLE)) || + ((comp_codes_1g & IXGBE_SFF_1GBASELX_CAPABLE) && + (comp_codes_10g & IXGBE_SFF_10GBASELR_CAPABLE))) + hw->phy.multispeed_fiber = true; + + /* Determine PHY vendor */ + if (hw->phy.type != ixgbe_phy_nl) { + hw->phy.id = identifier; + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_VENDOR_OUI_BYTE0, + &oui_bytes[0]); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_VENDOR_OUI_BYTE1, + &oui_bytes[1]); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + status = hw->phy.ops.read_i2c_eeprom(hw, + IXGBE_SFF_VENDOR_OUI_BYTE2, + &oui_bytes[2]); + + if (status == IXGBE_ERR_SWFW_SYNC || + status == IXGBE_ERR_I2C || + status == IXGBE_ERR_SFP_NOT_PRESENT) + goto err_read_i2c_eeprom; + + vendor_oui = + ((oui_bytes[0] << IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT) | + (oui_bytes[1] << IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT) | + (oui_bytes[2] << IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT)); + + switch (vendor_oui) { + case IXGBE_SFF_VENDOR_OUI_TYCO: + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_passive_tyco; + break; + case IXGBE_SFF_VENDOR_OUI_FTL: + if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = ixgbe_phy_sfp_ftl_active; + else + hw->phy.type = ixgbe_phy_sfp_ftl; + break; + case IXGBE_SFF_VENDOR_OUI_AVAGO: + hw->phy.type = ixgbe_phy_sfp_avago; + break; + case IXGBE_SFF_VENDOR_OUI_INTEL: + hw->phy.type = ixgbe_phy_sfp_intel; + break; + default: + if (cable_tech & IXGBE_SFF_DA_PASSIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_passive_unknown; + else if (cable_tech & IXGBE_SFF_DA_ACTIVE_CABLE) + hw->phy.type = + ixgbe_phy_sfp_active_unknown; + else + hw->phy.type = ixgbe_phy_sfp_unknown; + break; + } + } + + /* Allow any DA cable vendor */ + if (cable_tech & (IXGBE_SFF_DA_PASSIVE_CABLE | + IXGBE_SFF_DA_ACTIVE_CABLE)) { + status = 0; + goto out; + } + + /* Verify supported 1G SFP modules */ + if (comp_codes_10g == 0 && + !(hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0 || + hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1)) { + hw->phy.type = ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + goto out; + } + + /* Anything else 82598-based is supported */ + if (hw->mac.type == ixgbe_mac_82598EB) { + status = 0; + goto out; + } + + ixgbe_get_device_caps(hw, &enforce_sfp); + if (!(enforce_sfp & IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP) && + !((hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core0) || + (hw->phy.sfp_type == ixgbe_sfp_type_1g_cu_core1) || + (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core0) || + (hw->phy.sfp_type == ixgbe_sfp_type_1g_sx_core1))) { + /* Make sure we're a supported PHY type */ + if (hw->phy.type == ixgbe_phy_sfp_intel) { + status = 0; + } else { + if (hw->allow_unsupported_sfp == true) { + EWARN(hw, "WARNING: Intel (R) Network " + "Connections are quality tested " + "using Intel (R) Ethernet Optics." + " Using untested modules is not " + "supported and may cause unstable" + " operation or damage to the " + "module or the adapter. Intel " + "Corporation is not responsible " + "for any harm caused by using " + "untested modules.\n", status); + status = 0; + } else { + hw_dbg(hw, "SFP+ module not supported\n"); + hw->phy.type = + ixgbe_phy_sfp_unsupported; + status = IXGBE_ERR_SFP_NOT_SUPPORTED; + } + } + } else { + status = 0; + } + } + +out: + return status; + +err_read_i2c_eeprom: + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + if (hw->phy.type != ixgbe_phy_nl) { + hw->phy.id = 0; + hw->phy.type = ixgbe_phy_unknown; + } + return IXGBE_ERR_SFP_NOT_PRESENT; +} + +/** + * ixgbe_identify_qsfp_module_generic - Identifies QSFP modules + * @hw: pointer to hardware structure + * + * Searches for and identifies the QSFP module and assigns appropriate PHY type + **/ +s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw) +{ + s32 status = 0; + + if (hw->mac.ops.get_media_type(hw) != ixgbe_media_type_fiber_qsfp) { + hw->phy.sfp_type = ixgbe_sfp_type_not_present; + status = IXGBE_ERR_SFP_NOT_PRESENT; + } + + return status; +} + + +/** + * ixgbe_get_sfp_init_sequence_offsets - Provides offset of PHY init sequence + * @hw: pointer to hardware structure + * @list_offset: offset to the SFP ID list + * @data_offset: offset to the SFP data block + * + * Checks the MAC's EEPROM to see if it supports a given SFP+ module type, if + * so it returns the offsets to the phy init sequence block. + **/ +s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, + u16 *list_offset, + u16 *data_offset) +{ + u16 sfp_id; + u16 sfp_type = hw->phy.sfp_type; + + if (hw->phy.sfp_type == ixgbe_sfp_type_unknown) + return IXGBE_ERR_SFP_NOT_SUPPORTED; + + if (hw->phy.sfp_type == ixgbe_sfp_type_not_present) + return IXGBE_ERR_SFP_NOT_PRESENT; + + if ((hw->device_id == IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM) && + (hw->phy.sfp_type == ixgbe_sfp_type_da_cu)) + return IXGBE_ERR_SFP_NOT_SUPPORTED; + + /* + * Limiting active cables and 1G Phys must be initialized as + * SR modules + */ + if (sfp_type == ixgbe_sfp_type_da_act_lmt_core0 || + sfp_type == ixgbe_sfp_type_1g_cu_core0 || + sfp_type == ixgbe_sfp_type_1g_sx_core0) + sfp_type = ixgbe_sfp_type_srlr_core0; + else if (sfp_type == ixgbe_sfp_type_da_act_lmt_core1 || + sfp_type == ixgbe_sfp_type_1g_cu_core1 || + sfp_type == ixgbe_sfp_type_1g_sx_core1) + sfp_type = ixgbe_sfp_type_srlr_core1; + + /* Read offset to PHY init contents */ + hw->eeprom.ops.read(hw, IXGBE_PHY_INIT_OFFSET_NL, list_offset); + + if ((!*list_offset) || (*list_offset == 0xFFFF)) + return IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT; + + /* Shift offset to first ID word */ + (*list_offset)++; + + /* + * Find the matching SFP ID in the EEPROM + * and program the init sequence + */ + hw->eeprom.ops.read(hw, *list_offset, &sfp_id); + + while (sfp_id != IXGBE_PHY_INIT_END_NL) { + if (sfp_id == sfp_type) { + (*list_offset)++; + hw->eeprom.ops.read(hw, *list_offset, data_offset); + if ((!*data_offset) || (*data_offset == 0xFFFF)) { + hw_dbg(hw, "SFP+ module not supported\n"); + return IXGBE_ERR_SFP_NOT_SUPPORTED; + } else { + break; + } + } else { + (*list_offset) += 2; + if (hw->eeprom.ops.read(hw, *list_offset, &sfp_id)) + return IXGBE_ERR_PHY; + } + } + + if (sfp_id == IXGBE_PHY_INIT_END_NL) { + hw_dbg(hw, "No matching SFP+ module found\n"); + return IXGBE_ERR_SFP_NOT_SUPPORTED; + } + + return 0; +} + +/** + * ixgbe_read_i2c_eeprom_generic - Reads 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to read + * @eeprom_data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data) +{ + return hw->phy.ops.read_i2c_byte(hw, byte_offset, + IXGBE_I2C_EEPROM_DEV_ADDR, + eeprom_data); +} + +/** + * ixgbe_write_i2c_eeprom_generic - Writes 8 bit EEPROM word over I2C interface + * @hw: pointer to hardware structure + * @byte_offset: EEPROM byte offset to write + * @eeprom_data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface. + **/ +s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 eeprom_data) +{ + return hw->phy.ops.write_i2c_byte(hw, byte_offset, + IXGBE_I2C_EEPROM_DEV_ADDR, + eeprom_data); +} + +/** + * ixgbe_read_i2c_byte_generic - Reads 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to read + * @data: value read + * + * Performs byte read operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data) +{ + s32 status = 0; + u32 max_retry = 10; + u32 retry = 0; + u16 swfw_mask = 0; + bool nack = 1; + *data = 0; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + swfw_mask = IXGBE_GSSR_PHY1_SM; + else + swfw_mask = IXGBE_GSSR_PHY0_SM; + + do { + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) + != 0) { + status = IXGBE_ERR_SWFW_SYNC; + goto read_byte_out; + } + + ixgbe_i2c_start(hw); + + /* Device Address and write indication */ + status = ixgbe_clock_out_i2c_byte(hw, dev_addr); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_byte(hw, byte_offset); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + ixgbe_i2c_start(hw); + + /* Device Address and read indication */ + status = ixgbe_clock_out_i2c_byte(hw, (dev_addr | 0x1)); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_in_i2c_byte(hw, data); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_bit(hw, nack); + if (status != 0) + goto fail; + + ixgbe_i2c_stop(hw); + break; + +fail: + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + msleep(100); + ixgbe_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + hw_dbg(hw, "I2C byte read error - Retrying.\n"); + else + hw_dbg(hw, "I2C byte read error.\n"); + + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +read_byte_out: + return status; +} + +/** + * ixgbe_write_i2c_byte_generic - Writes 8 bit word over I2C + * @hw: pointer to hardware structure + * @byte_offset: byte offset to write + * @data: value to write + * + * Performs byte write operation to SFP module's EEPROM over I2C interface at + * a specified device address. + **/ +s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data) +{ + s32 status = 0; + u32 max_retry = 1; + u32 retry = 0; + u16 swfw_mask = 0; + + if (IXGBE_READ_REG(hw, IXGBE_STATUS) & IXGBE_STATUS_LAN_ID_1) + swfw_mask = IXGBE_GSSR_PHY1_SM; + else + swfw_mask = IXGBE_GSSR_PHY0_SM; + + if (hw->mac.ops.acquire_swfw_sync(hw, swfw_mask) != 0) { + status = IXGBE_ERR_SWFW_SYNC; + goto write_byte_out; + } + + do { + ixgbe_i2c_start(hw); + + status = ixgbe_clock_out_i2c_byte(hw, dev_addr); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_byte(hw, byte_offset); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + status = ixgbe_clock_out_i2c_byte(hw, data); + if (status != 0) + goto fail; + + status = ixgbe_get_i2c_ack(hw); + if (status != 0) + goto fail; + + ixgbe_i2c_stop(hw); + break; + +fail: + ixgbe_i2c_bus_clear(hw); + retry++; + if (retry < max_retry) + hw_dbg(hw, "I2C byte write error - Retrying.\n"); + else + hw_dbg(hw, "I2C byte write error.\n"); + } while (retry < max_retry); + + hw->mac.ops.release_swfw_sync(hw, swfw_mask); + +write_byte_out: + return status; +} + +/** + * ixgbe_i2c_start - Sets I2C start condition + * @hw: pointer to hardware structure + * + * Sets I2C start condition (High -> Low on SDA while SCL is High) + **/ +static void ixgbe_i2c_start(struct ixgbe_hw *hw) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + /* Start condition must begin with data and clock high */ + ixgbe_set_i2c_data(hw, &i2cctl, 1); + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for start condition (4.7us) */ + udelay(IXGBE_I2C_T_SU_STA); + + ixgbe_set_i2c_data(hw, &i2cctl, 0); + + /* Hold time for start condition (4us) */ + udelay(IXGBE_I2C_T_HD_STA); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + udelay(IXGBE_I2C_T_LOW); + +} + +/** + * ixgbe_i2c_stop - Sets I2C stop condition + * @hw: pointer to hardware structure + * + * Sets I2C stop condition (Low -> High on SDA while SCL is High) + **/ +static void ixgbe_i2c_stop(struct ixgbe_hw *hw) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + /* Stop condition must begin with data low and clock high */ + ixgbe_set_i2c_data(hw, &i2cctl, 0); + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Setup time for stop condition (4us) */ + udelay(IXGBE_I2C_T_SU_STO); + + ixgbe_set_i2c_data(hw, &i2cctl, 1); + + /* bus free time between stop and start (4.7us)*/ + udelay(IXGBE_I2C_T_BUF); +} + +/** + * ixgbe_clock_in_i2c_byte - Clocks in one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte to clock in + * + * Clocks in one byte data via I2C data/clock + **/ +static s32 ixgbe_clock_in_i2c_byte(struct ixgbe_hw *hw, u8 *data) +{ + s32 i; + bool bit = 0; + + for (i = 7; i >= 0; i--) { + ixgbe_clock_in_i2c_bit(hw, &bit); + *data |= bit << i; + } + + return 0; +} + +/** + * ixgbe_clock_out_i2c_byte - Clocks out one byte via I2C + * @hw: pointer to hardware structure + * @data: data byte clocked out + * + * Clocks out one byte data via I2C data/clock + **/ +static s32 ixgbe_clock_out_i2c_byte(struct ixgbe_hw *hw, u8 data) +{ + s32 status = 0; + s32 i; + u32 i2cctl; + bool bit = 0; + + for (i = 7; i >= 0; i--) { + bit = (data >> i) & 0x1; + status = ixgbe_clock_out_i2c_bit(hw, bit); + + if (status != 0) + break; + } + + /* Release SDA line (set high) */ + i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + i2cctl |= IXGBE_I2C_DATA_OUT; + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, i2cctl); + IXGBE_WRITE_FLUSH(hw); + + return status; +} + +/** + * ixgbe_get_i2c_ack - Polls for I2C ACK + * @hw: pointer to hardware structure + * + * Clocks in/out one bit via I2C data/clock + **/ +static s32 ixgbe_get_i2c_ack(struct ixgbe_hw *hw) +{ + s32 status = 0; + u32 i = 0; + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + u32 timeout = 10; + bool ack = 1; + + ixgbe_raise_i2c_clk(hw, &i2cctl); + + + /* Minimum high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + /* Poll for ACK. Note that ACK in I2C spec is + * transition from 1 to 0 */ + for (i = 0; i < timeout; i++) { + i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + ack = ixgbe_get_i2c_data(&i2cctl); + + udelay(1); + if (ack == 0) + break; + } + + if (ack == 1) { + hw_dbg(hw, "I2C ack was not received.\n"); + status = IXGBE_ERR_I2C; + } + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + udelay(IXGBE_I2C_T_LOW); + + return status; +} + +/** + * ixgbe_clock_in_i2c_bit - Clocks in one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: read data value + * + * Clocks in one bit via I2C data/clock + **/ +static s32 ixgbe_clock_in_i2c_bit(struct ixgbe_hw *hw, bool *data) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + *data = ixgbe_get_i2c_data(&i2cctl); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us */ + udelay(IXGBE_I2C_T_LOW); + + return 0; +} + +/** + * ixgbe_clock_out_i2c_bit - Clocks in/out one bit via I2C data/clock + * @hw: pointer to hardware structure + * @data: data value to write + * + * Clocks out one bit via I2C data/clock + **/ +static s32 ixgbe_clock_out_i2c_bit(struct ixgbe_hw *hw, bool data) +{ + s32 status; + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + + status = ixgbe_set_i2c_data(hw, &i2cctl, data); + if (status == 0) { + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Minimum high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Minimum low period of clock is 4.7 us. + * This also takes care of the data hold time. + */ + udelay(IXGBE_I2C_T_LOW); + } else { + status = IXGBE_ERR_I2C; + hw_dbg(hw, "I2C data was not set to %X\n", data); + } + + return status; +} +/** + * ixgbe_raise_i2c_clk - Raises the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Raises the I2C clock line '0'->'1' + **/ +static void ixgbe_raise_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) +{ + u32 i = 0; + u32 timeout = IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT; + u32 i2cctl_r = 0; + + for (i = 0; i < timeout; i++) { + *i2cctl |= IXGBE_I2C_CLK_OUT; + + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); + /* SCL rise time (1000ns) */ + udelay(IXGBE_I2C_T_RISE); + + i2cctl_r = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + if (i2cctl_r & IXGBE_I2C_CLK_IN) + break; + } +} + +/** + * ixgbe_lower_i2c_clk - Lowers the I2C SCL clock + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Lowers the I2C clock line '1'->'0' + **/ +static void ixgbe_lower_i2c_clk(struct ixgbe_hw *hw, u32 *i2cctl) +{ + + *i2cctl &= ~IXGBE_I2C_CLK_OUT; + + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); + + /* SCL fall time (300ns) */ + udelay(IXGBE_I2C_T_FALL); +} + +/** + * ixgbe_set_i2c_data - Sets the I2C data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * @data: I2C data value (0 or 1) to set + * + * Sets the I2C data bit + **/ +static s32 ixgbe_set_i2c_data(struct ixgbe_hw *hw, u32 *i2cctl, bool data) +{ + s32 status = 0; + + if (data) + *i2cctl |= IXGBE_I2C_DATA_OUT; + else + *i2cctl &= ~IXGBE_I2C_DATA_OUT; + + IXGBE_WRITE_REG(hw, IXGBE_I2CCTL, *i2cctl); + IXGBE_WRITE_FLUSH(hw); + + /* Data rise/fall (1000ns/300ns) and set-up time (250ns) */ + udelay(IXGBE_I2C_T_RISE + IXGBE_I2C_T_FALL + IXGBE_I2C_T_SU_DATA); + + /* Verify data was set correctly */ + *i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + if (data != ixgbe_get_i2c_data(i2cctl)) { + status = IXGBE_ERR_I2C; + hw_dbg(hw, "Error - I2C data was not set to %X.\n", data); + } + + return status; +} + +/** + * ixgbe_get_i2c_data - Reads the I2C SDA data bit + * @hw: pointer to hardware structure + * @i2cctl: Current value of I2CCTL register + * + * Returns the I2C data bit value + **/ +static bool ixgbe_get_i2c_data(u32 *i2cctl) +{ + bool data; + + if (*i2cctl & IXGBE_I2C_DATA_IN) + data = 1; + else + data = 0; + + return data; +} + +/** + * ixgbe_i2c_bus_clear - Clears the I2C bus + * @hw: pointer to hardware structure + * + * Clears the I2C bus by sending nine clock pulses. + * Used when data line is stuck low. + **/ +void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw) +{ + u32 i2cctl = IXGBE_READ_REG(hw, IXGBE_I2CCTL); + u32 i; + + ixgbe_i2c_start(hw); + + ixgbe_set_i2c_data(hw, &i2cctl, 1); + + for (i = 0; i < 9; i++) { + ixgbe_raise_i2c_clk(hw, &i2cctl); + + /* Min high period of clock is 4us */ + udelay(IXGBE_I2C_T_HIGH); + + ixgbe_lower_i2c_clk(hw, &i2cctl); + + /* Min low period of clock is 4.7us*/ + udelay(IXGBE_I2C_T_LOW); + } + + ixgbe_i2c_start(hw); + + /* Put the i2c bus back to default state */ + ixgbe_i2c_stop(hw); +} + +/** + * ixgbe_tn_check_overtemp - Checks if an overtemp occurred. + * @hw: pointer to hardware structure + * + * Checks if the LASI temp alarm status was triggered due to overtemp + **/ +s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw) +{ + s32 status = 0; + u16 phy_data = 0; + + if (hw->device_id != IXGBE_DEV_ID_82599_T3_LOM) + goto out; + + /* Check that the LASI temp alarm status was triggered */ + hw->phy.ops.read_reg(hw, IXGBE_TN_LASI_STATUS_REG, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &phy_data); + + if (!(phy_data & IXGBE_TN_LASI_STATUS_TEMP_ALARM)) + goto out; + + status = IXGBE_ERR_OVERTEMP; +out: + return status; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h new file mode 100644 index 00000000..bbe5a9e3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_phy.h @@ -0,0 +1,137 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_PHY_H_ +#define _IXGBE_PHY_H_ + +#include "ixgbe_type.h" +#define IXGBE_I2C_EEPROM_DEV_ADDR 0xA0 + +/* EEPROM byte offsets */ +#define IXGBE_SFF_IDENTIFIER 0x0 +#define IXGBE_SFF_IDENTIFIER_SFP 0x3 +#define IXGBE_SFF_VENDOR_OUI_BYTE0 0x25 +#define IXGBE_SFF_VENDOR_OUI_BYTE1 0x26 +#define IXGBE_SFF_VENDOR_OUI_BYTE2 0x27 +#define IXGBE_SFF_1GBE_COMP_CODES 0x6 +#define IXGBE_SFF_10GBE_COMP_CODES 0x3 +#define IXGBE_SFF_CABLE_TECHNOLOGY 0x8 +#define IXGBE_SFF_CABLE_SPEC_COMP 0x3C + +/* Bitmasks */ +#define IXGBE_SFF_DA_PASSIVE_CABLE 0x4 +#define IXGBE_SFF_DA_ACTIVE_CABLE 0x8 +#define IXGBE_SFF_DA_SPEC_ACTIVE_LIMITING 0x4 +#define IXGBE_SFF_1GBASESX_CAPABLE 0x1 +#define IXGBE_SFF_1GBASELX_CAPABLE 0x2 +#define IXGBE_SFF_1GBASET_CAPABLE 0x8 +#define IXGBE_SFF_10GBASESR_CAPABLE 0x10 +#define IXGBE_SFF_10GBASELR_CAPABLE 0x20 +#define IXGBE_I2C_EEPROM_READ_MASK 0x100 +#define IXGBE_I2C_EEPROM_STATUS_MASK 0x3 +#define IXGBE_I2C_EEPROM_STATUS_NO_OPERATION 0x0 +#define IXGBE_I2C_EEPROM_STATUS_PASS 0x1 +#define IXGBE_I2C_EEPROM_STATUS_FAIL 0x2 +#define IXGBE_I2C_EEPROM_STATUS_IN_PROGRESS 0x3 + +/* Flow control defines */ +#define IXGBE_TAF_SYM_PAUSE 0x400 +#define IXGBE_TAF_ASM_PAUSE 0x800 + +/* Bit-shift macros */ +#define IXGBE_SFF_VENDOR_OUI_BYTE0_SHIFT 24 +#define IXGBE_SFF_VENDOR_OUI_BYTE1_SHIFT 16 +#define IXGBE_SFF_VENDOR_OUI_BYTE2_SHIFT 8 + +/* Vendor OUIs: format of OUI is 0x[byte0][byte1][byte2][00] */ +#define IXGBE_SFF_VENDOR_OUI_TYCO 0x00407600 +#define IXGBE_SFF_VENDOR_OUI_FTL 0x00906500 +#define IXGBE_SFF_VENDOR_OUI_AVAGO 0x00176A00 +#define IXGBE_SFF_VENDOR_OUI_INTEL 0x001B2100 + +/* I2C SDA and SCL timing parameters for standard mode */ +#define IXGBE_I2C_T_HD_STA 4 +#define IXGBE_I2C_T_LOW 5 +#define IXGBE_I2C_T_HIGH 4 +#define IXGBE_I2C_T_SU_STA 5 +#define IXGBE_I2C_T_HD_DATA 5 +#define IXGBE_I2C_T_SU_DATA 1 +#define IXGBE_I2C_T_RISE 1 +#define IXGBE_I2C_T_FALL 1 +#define IXGBE_I2C_T_SU_STO 4 +#define IXGBE_I2C_T_BUF 5 + +#define IXGBE_TN_LASI_STATUS_REG 0x9005 +#define IXGBE_TN_LASI_STATUS_TEMP_ALARM 0x0008 + +s32 ixgbe_init_phy_ops_generic(struct ixgbe_hw *hw); +bool ixgbe_validate_phy_addr(struct ixgbe_hw *hw, u32 phy_addr); +enum ixgbe_phy_type ixgbe_get_phy_type_from_id(u32 phy_id); +s32 ixgbe_get_phy_id(struct ixgbe_hw *hw); +s32 ixgbe_identify_phy_generic(struct ixgbe_hw *hw); +s32 ixgbe_reset_phy_generic(struct ixgbe_hw *hw); +s32 ixgbe_read_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 *phy_data); +s32 ixgbe_write_phy_reg_generic(struct ixgbe_hw *hw, u32 reg_addr, + u32 device_type, u16 phy_data); +s32 ixgbe_setup_phy_link_generic(struct ixgbe_hw *hw); +s32 ixgbe_setup_phy_link_speed_generic(struct ixgbe_hw *hw, + ixgbe_link_speed speed, + bool autoneg, + bool autoneg_wait_to_complete); +s32 ixgbe_get_copper_link_capabilities_generic(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg); + +/* PHY specific */ +s32 ixgbe_check_phy_link_tnx(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *link_up); +s32 ixgbe_setup_phy_link_tnx(struct ixgbe_hw *hw); +s32 ixgbe_get_phy_firmware_version_tnx(struct ixgbe_hw *hw, + u16 *firmware_version); +s32 ixgbe_get_phy_firmware_version_generic(struct ixgbe_hw *hw, + u16 *firmware_version); + +s32 ixgbe_reset_phy_nl(struct ixgbe_hw *hw); +s32 ixgbe_identify_module_generic(struct ixgbe_hw *hw); +s32 ixgbe_identify_sfp_module_generic(struct ixgbe_hw *hw); +s32 ixgbe_identify_qsfp_module_generic(struct ixgbe_hw *hw); +s32 ixgbe_get_sfp_init_sequence_offsets(struct ixgbe_hw *hw, + u16 *list_offset, + u16 *data_offset); +s32 ixgbe_tn_check_overtemp(struct ixgbe_hw *hw); +s32 ixgbe_read_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 *data); +s32 ixgbe_write_i2c_byte_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 dev_addr, u8 data); +s32 ixgbe_read_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 *eeprom_data); +s32 ixgbe_write_i2c_eeprom_generic(struct ixgbe_hw *hw, u8 byte_offset, + u8 eeprom_data); +void ixgbe_i2c_bus_clear(struct ixgbe_hw *hw); +#endif /* _IXGBE_PHY_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h new file mode 100644 index 00000000..5e3559fd --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_sriov.h @@ -0,0 +1,73 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + + +#ifndef _IXGBE_SRIOV_H_ +#define _IXGBE_SRIOV_H_ + +int ixgbe_set_vf_multicasts(struct ixgbe_adapter *adapter, + int entries, u16 *hash_list, u32 vf); +void ixgbe_restore_vf_multicasts(struct ixgbe_adapter *adapter); +int ixgbe_set_vf_vlan(struct ixgbe_adapter *adapter, int add, int vid, u32 vf); +void ixgbe_set_vmolr(struct ixgbe_hw *hw, u32 vf, bool aupe); +void ixgbe_vf_reset_event(struct ixgbe_adapter *adapter, u32 vf); +void ixgbe_vf_reset_msg(struct ixgbe_adapter *adapter, u32 vf); +void ixgbe_msg_task(struct ixgbe_adapter *adapter); +int ixgbe_set_vf_mac(struct ixgbe_adapter *adapter, + int vf, unsigned char *mac_addr); +void ixgbe_disable_tx_rx(struct ixgbe_adapter *adapter); +void ixgbe_ping_all_vfs(struct ixgbe_adapter *adapter); +#ifdef IFLA_VF_MAX +int ixgbe_ndo_set_vf_mac(struct net_device *netdev, int queue, u8 *mac); +int ixgbe_ndo_set_vf_vlan(struct net_device *netdev, int queue, u16 vlan, + u8 qos); +int ixgbe_ndo_set_vf_bw(struct net_device *netdev, int vf, int tx_rate); +#ifdef HAVE_VF_SPOOFCHK_CONFIGURE +int ixgbe_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting); +#endif +int ixgbe_ndo_get_vf_config(struct net_device *netdev, + int vf, struct ifla_vf_info *ivi); +#endif +void ixgbe_disable_sriov(struct ixgbe_adapter *adapter); +#ifdef CONFIG_PCI_IOV +int ixgbe_vf_configuration(struct pci_dev *pdev, unsigned int event_mask); +void ixgbe_enable_sriov(struct ixgbe_adapter *adapter); +#endif +int ixgbe_check_vf_assignment(struct ixgbe_adapter *adapter); +#ifdef IFLA_VF_MAX +void ixgbe_check_vf_rate_limit(struct ixgbe_adapter *adapter); +#endif /* IFLA_VF_MAX */ +void ixgbe_dump_registers(struct ixgbe_adapter *adapter); + +/* + * These are defined in ixgbe_type.h on behalf of the VF driver + * but we need them here unwrapped for the PF driver. + */ +#define IXGBE_DEV_ID_82599_VF 0x10ED +#define IXGBE_DEV_ID_X540_VF 0x1515 + +#endif /* _IXGBE_SRIOV_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h new file mode 100644 index 00000000..6b21c879 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_type.h @@ -0,0 +1,3254 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_TYPE_H_ +#define _IXGBE_TYPE_H_ + +#include "ixgbe_osdep.h" + + +/* Vendor ID */ +#define IXGBE_INTEL_VENDOR_ID 0x8086 + +/* Device IDs */ +#define IXGBE_DEV_ID_82598 0x10B6 +#define IXGBE_DEV_ID_82598_BX 0x1508 +#define IXGBE_DEV_ID_82598AF_DUAL_PORT 0x10C6 +#define IXGBE_DEV_ID_82598AF_SINGLE_PORT 0x10C7 +#define IXGBE_DEV_ID_82598AT 0x10C8 +#define IXGBE_DEV_ID_82598AT2 0x150B +#define IXGBE_DEV_ID_82598EB_SFP_LOM 0x10DB +#define IXGBE_DEV_ID_82598EB_CX4 0x10DD +#define IXGBE_DEV_ID_82598_CX4_DUAL_PORT 0x10EC +#define IXGBE_DEV_ID_82598_DA_DUAL_PORT 0x10F1 +#define IXGBE_DEV_ID_82598_SR_DUAL_PORT_EM 0x10E1 +#define IXGBE_DEV_ID_82598EB_XF_LR 0x10F4 +#define IXGBE_DEV_ID_82599_KX4 0x10F7 +#define IXGBE_DEV_ID_82599_KX4_MEZZ 0x1514 +#define IXGBE_DEV_ID_82599_KR 0x1517 +#define IXGBE_DEV_ID_82599_COMBO_BACKPLANE 0x10F8 +#define IXGBE_SUBDEV_ID_82599_KX4_KR_MEZZ 0x000C +#define IXGBE_DEV_ID_82599_CX4 0x10F9 +#define IXGBE_DEV_ID_82599_SFP 0x10FB +#define IXGBE_SUBDEV_ID_82599_SFP 0x11A9 +#define IXGBE_SUBDEV_ID_82599_560FLR 0x17D0 +#define IXGBE_DEV_ID_82599_BACKPLANE_FCOE 0x152A +#define IXGBE_DEV_ID_82599_SFP_FCOE 0x1529 +#define IXGBE_DEV_ID_82599_SFP_EM 0x1507 +#define IXGBE_DEV_ID_82599_SFP_SF2 0x154D +#define IXGBE_DEV_ID_82599_QSFP_SF_QP 0x1558 +#define IXGBE_DEV_ID_82599EN_SFP 0x1557 +#define IXGBE_DEV_ID_82599_XAUI_LOM 0x10FC +#define IXGBE_DEV_ID_82599_T3_LOM 0x151C +#define IXGBE_DEV_ID_82599_LS 0x154F +#define IXGBE_DEV_ID_X540T 0x1528 + +/* General Registers */ +#define IXGBE_CTRL 0x00000 +#define IXGBE_STATUS 0x00008 +#define IXGBE_CTRL_EXT 0x00018 +#define IXGBE_ESDP 0x00020 +#define IXGBE_EODSDP 0x00028 +#define IXGBE_I2CCTL 0x00028 +#define IXGBE_PHY_GPIO 0x00028 +#define IXGBE_MAC_GPIO 0x00030 +#define IXGBE_PHYINT_STATUS0 0x00100 +#define IXGBE_PHYINT_STATUS1 0x00104 +#define IXGBE_PHYINT_STATUS2 0x00108 +#define IXGBE_LEDCTL 0x00200 +#define IXGBE_FRTIMER 0x00048 +#define IXGBE_TCPTIMER 0x0004C +#define IXGBE_CORESPARE 0x00600 +#define IXGBE_EXVET 0x05078 + +/* NVM Registers */ +#define IXGBE_EEC 0x10010 +#define IXGBE_EERD 0x10014 +#define IXGBE_EEWR 0x10018 +#define IXGBE_FLA 0x1001C +#define IXGBE_EEMNGCTL 0x10110 +#define IXGBE_EEMNGDATA 0x10114 +#define IXGBE_FLMNGCTL 0x10118 +#define IXGBE_FLMNGDATA 0x1011C +#define IXGBE_FLMNGCNT 0x10120 +#define IXGBE_FLOP 0x1013C +#define IXGBE_GRC 0x10200 +#define IXGBE_SRAMREL 0x10210 +#define IXGBE_PHYDBG 0x10218 + +/* General Receive Control */ +#define IXGBE_GRC_MNG 0x00000001 /* Manageability Enable */ +#define IXGBE_GRC_APME 0x00000002 /* APM enabled in EEPROM */ + +#define IXGBE_VPDDIAG0 0x10204 +#define IXGBE_VPDDIAG1 0x10208 + +/* I2CCTL Bit Masks */ +#define IXGBE_I2C_CLK_IN 0x00000001 +#define IXGBE_I2C_CLK_OUT 0x00000002 +#define IXGBE_I2C_DATA_IN 0x00000004 +#define IXGBE_I2C_DATA_OUT 0x00000008 +#define IXGBE_I2C_CLOCK_STRETCHING_TIMEOUT 500 + +#define IXGBE_I2C_THERMAL_SENSOR_ADDR 0xF8 +#define IXGBE_EMC_INTERNAL_DATA 0x00 +#define IXGBE_EMC_INTERNAL_THERM_LIMIT 0x20 +#define IXGBE_EMC_DIODE1_DATA 0x01 +#define IXGBE_EMC_DIODE1_THERM_LIMIT 0x19 +#define IXGBE_EMC_DIODE2_DATA 0x23 +#define IXGBE_EMC_DIODE2_THERM_LIMIT 0x1A + +#define IXGBE_MAX_SENSORS 3 + +struct ixgbe_thermal_diode_data { + u8 location; + u8 temp; + u8 caution_thresh; + u8 max_op_thresh; +}; + +struct ixgbe_thermal_sensor_data { + struct ixgbe_thermal_diode_data sensor[IXGBE_MAX_SENSORS]; +}; + +/* Interrupt Registers */ +#define IXGBE_EICR 0x00800 +#define IXGBE_EICS 0x00808 +#define IXGBE_EIMS 0x00880 +#define IXGBE_EIMC 0x00888 +#define IXGBE_EIAC 0x00810 +#define IXGBE_EIAM 0x00890 +#define IXGBE_EICS_EX(_i) (0x00A90 + (_i) * 4) +#define IXGBE_EIMS_EX(_i) (0x00AA0 + (_i) * 4) +#define IXGBE_EIMC_EX(_i) (0x00AB0 + (_i) * 4) +#define IXGBE_EIAM_EX(_i) (0x00AD0 + (_i) * 4) +/* 82599 EITR is only 12 bits, with the lower 3 always zero */ +/* + * 82598 EITR is 16 bits but set the limits based on the max + * supported by all ixgbe hardware + */ +#define IXGBE_MAX_INT_RATE 488281 +#define IXGBE_MIN_INT_RATE 956 +#define IXGBE_MAX_EITR 0x00000FF8 +#define IXGBE_MIN_EITR 8 +#define IXGBE_EITR(_i) (((_i) <= 23) ? (0x00820 + ((_i) * 4)) : \ + (0x012300 + (((_i) - 24) * 4))) +#define IXGBE_EITR_ITR_INT_MASK 0x00000FF8 +#define IXGBE_EITR_LLI_MOD 0x00008000 +#define IXGBE_EITR_CNT_WDIS 0x80000000 +#define IXGBE_IVAR(_i) (0x00900 + ((_i) * 4)) /* 24 at 0x900-0x960 */ +#define IXGBE_IVAR_MISC 0x00A00 /* misc MSI-X interrupt causes */ +#define IXGBE_EITRSEL 0x00894 +#define IXGBE_MSIXT 0x00000 /* MSI-X Table. 0x0000 - 0x01C */ +#define IXGBE_MSIXPBA 0x02000 /* MSI-X Pending bit array */ +#define IXGBE_PBACL(_i) (((_i) == 0) ? (0x11068) : (0x110C0 + ((_i) * 4))) +#define IXGBE_GPIE 0x00898 + +/* Flow Control Registers */ +#define IXGBE_FCADBUL 0x03210 +#define IXGBE_FCADBUH 0x03214 +#define IXGBE_FCAMACL 0x04328 +#define IXGBE_FCAMACH 0x0432C +#define IXGBE_FCRTH_82599(_i) (0x03260 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_FCRTL_82599(_i) (0x03220 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_PFCTOP 0x03008 +#define IXGBE_FCTTV(_i) (0x03200 + ((_i) * 4)) /* 4 of these (0-3) */ +#define IXGBE_FCRTL(_i) (0x03220 + ((_i) * 8)) /* 8 of these (0-7) */ +#define IXGBE_FCRTH(_i) (0x03260 + ((_i) * 8)) /* 8 of these (0-7) */ +#define IXGBE_FCRTV 0x032A0 +#define IXGBE_FCCFG 0x03D00 +#define IXGBE_TFCS 0x0CE00 + +/* Receive DMA Registers */ +#define IXGBE_RDBAL(_i) (((_i) < 64) ? (0x01000 + ((_i) * 0x40)) : \ + (0x0D000 + (((_i) - 64) * 0x40))) +#define IXGBE_RDBAH(_i) (((_i) < 64) ? (0x01004 + ((_i) * 0x40)) : \ + (0x0D004 + (((_i) - 64) * 0x40))) +#define IXGBE_RDLEN(_i) (((_i) < 64) ? (0x01008 + ((_i) * 0x40)) : \ + (0x0D008 + (((_i) - 64) * 0x40))) +#define IXGBE_RDH(_i) (((_i) < 64) ? (0x01010 + ((_i) * 0x40)) : \ + (0x0D010 + (((_i) - 64) * 0x40))) +#define IXGBE_RDT(_i) (((_i) < 64) ? (0x01018 + ((_i) * 0x40)) : \ + (0x0D018 + (((_i) - 64) * 0x40))) +#define IXGBE_RXDCTL(_i) (((_i) < 64) ? (0x01028 + ((_i) * 0x40)) : \ + (0x0D028 + (((_i) - 64) * 0x40))) +#define IXGBE_RSCCTL(_i) (((_i) < 64) ? (0x0102C + ((_i) * 0x40)) : \ + (0x0D02C + (((_i) - 64) * 0x40))) +#define IXGBE_RSCDBU 0x03028 +#define IXGBE_RDDCC 0x02F20 +#define IXGBE_RXMEMWRAP 0x03190 +#define IXGBE_STARCTRL 0x03024 +/* + * Split and Replication Receive Control Registers + * 00-15 : 0x02100 + n*4 + * 16-64 : 0x01014 + n*0x40 + * 64-127: 0x0D014 + (n-64)*0x40 + */ +#define IXGBE_SRRCTL(_i) (((_i) <= 15) ? (0x02100 + ((_i) * 4)) : \ + (((_i) < 64) ? (0x01014 + ((_i) * 0x40)) : \ + (0x0D014 + (((_i) - 64) * 0x40)))) +/* + * Rx DCA Control Register: + * 00-15 : 0x02200 + n*4 + * 16-64 : 0x0100C + n*0x40 + * 64-127: 0x0D00C + (n-64)*0x40 + */ +#define IXGBE_DCA_RXCTRL(_i) (((_i) <= 15) ? (0x02200 + ((_i) * 4)) : \ + (((_i) < 64) ? (0x0100C + ((_i) * 0x40)) : \ + (0x0D00C + (((_i) - 64) * 0x40)))) +#define IXGBE_RDRXCTL 0x02F00 +#define IXGBE_RDRXCTL_RSC_PUSH 0x80 +/* 8 of these 0x03C00 - 0x03C1C */ +#define IXGBE_RXPBSIZE(_i) (0x03C00 + ((_i) * 4)) +#define IXGBE_RXCTRL 0x03000 +#define IXGBE_DROPEN 0x03D04 +#define IXGBE_RXPBSIZE_SHIFT 10 + +/* Receive Registers */ +#define IXGBE_RXCSUM 0x05000 +#define IXGBE_RFCTL 0x05008 +#define IXGBE_DRECCCTL 0x02F08 +#define IXGBE_DRECCCTL_DISABLE 0 +#define IXGBE_DRECCCTL2 0x02F8C + +/* Multicast Table Array - 128 entries */ +#define IXGBE_MTA(_i) (0x05200 + ((_i) * 4)) +#define IXGBE_RAL(_i) (((_i) <= 15) ? (0x05400 + ((_i) * 8)) : \ + (0x0A200 + ((_i) * 8))) +#define IXGBE_RAH(_i) (((_i) <= 15) ? (0x05404 + ((_i) * 8)) : \ + (0x0A204 + ((_i) * 8))) +#define IXGBE_MPSAR_LO(_i) (0x0A600 + ((_i) * 8)) +#define IXGBE_MPSAR_HI(_i) (0x0A604 + ((_i) * 8)) +/* Packet split receive type */ +#define IXGBE_PSRTYPE(_i) (((_i) <= 15) ? (0x05480 + ((_i) * 4)) : \ + (0x0EA00 + ((_i) * 4))) +/* array of 4096 1-bit vlan filters */ +#define IXGBE_VFTA(_i) (0x0A000 + ((_i) * 4)) +/*array of 4096 4-bit vlan vmdq indices */ +#define IXGBE_VFTAVIND(_j, _i) (0x0A200 + ((_j) * 0x200) + ((_i) * 4)) +#define IXGBE_FCTRL 0x05080 +#define IXGBE_VLNCTRL 0x05088 +#define IXGBE_MCSTCTRL 0x05090 +#define IXGBE_MRQC 0x05818 +#define IXGBE_SAQF(_i) (0x0E000 + ((_i) * 4)) /* Source Address Queue Filter */ +#define IXGBE_DAQF(_i) (0x0E200 + ((_i) * 4)) /* Dest. Address Queue Filter */ +#define IXGBE_SDPQF(_i) (0x0E400 + ((_i) * 4)) /* Src Dest. Addr Queue Filter */ +#define IXGBE_FTQF(_i) (0x0E600 + ((_i) * 4)) /* Five Tuple Queue Filter */ +#define IXGBE_ETQF(_i) (0x05128 + ((_i) * 4)) /* EType Queue Filter */ +#define IXGBE_ETQS(_i) (0x0EC00 + ((_i) * 4)) /* EType Queue Select */ +#define IXGBE_SYNQF 0x0EC30 /* SYN Packet Queue Filter */ +#define IXGBE_RQTC 0x0EC70 +#define IXGBE_MTQC 0x08120 +#define IXGBE_VLVF(_i) (0x0F100 + ((_i) * 4)) /* 64 of these (0-63) */ +#define IXGBE_VLVFB(_i) (0x0F200 + ((_i) * 4)) /* 128 of these (0-127) */ +#define IXGBE_VMVIR(_i) (0x08000 + ((_i) * 4)) /* 64 of these (0-63) */ +#define IXGBE_VT_CTL 0x051B0 +#define IXGBE_PFMAILBOX(_i) (0x04B00 + (4 * (_i))) /* 64 total */ +/* 64 Mailboxes, 16 DW each */ +#define IXGBE_PFMBMEM(_i) (0x13000 + (64 * (_i))) +#define IXGBE_PFMBICR(_i) (0x00710 + (4 * (_i))) /* 4 total */ +#define IXGBE_PFMBIMR(_i) (0x00720 + (4 * (_i))) /* 4 total */ +#define IXGBE_VFRE(_i) (0x051E0 + ((_i) * 4)) +#define IXGBE_VFTE(_i) (0x08110 + ((_i) * 4)) +#define IXGBE_VMECM(_i) (0x08790 + ((_i) * 4)) +#define IXGBE_QDE 0x2F04 +#define IXGBE_VMTXSW(_i) (0x05180 + ((_i) * 4)) /* 2 total */ +#define IXGBE_VMOLR(_i) (0x0F000 + ((_i) * 4)) /* 64 total */ +#define IXGBE_UTA(_i) (0x0F400 + ((_i) * 4)) +#define IXGBE_MRCTL(_i) (0x0F600 + ((_i) * 4)) +#define IXGBE_VMRVLAN(_i) (0x0F610 + ((_i) * 4)) +#define IXGBE_VMRVM(_i) (0x0F630 + ((_i) * 4)) +#define IXGBE_L34T_IMIR(_i) (0x0E800 + ((_i) * 4)) /*128 of these (0-127)*/ +#define IXGBE_RXFECCERR0 0x051B8 +#define IXGBE_LLITHRESH 0x0EC90 +#define IXGBE_IMIR(_i) (0x05A80 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_IMIREXT(_i) (0x05AA0 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_IMIRVP 0x05AC0 +#define IXGBE_VMD_CTL 0x0581C +#define IXGBE_RETA(_i) (0x05C00 + ((_i) * 4)) /* 32 of these (0-31) */ +#define IXGBE_RSSRK(_i) (0x05C80 + ((_i) * 4)) /* 10 of these (0-9) */ + +/* Flow Director registers */ +#define IXGBE_FDIRCTRL 0x0EE00 +#define IXGBE_FDIRHKEY 0x0EE68 +#define IXGBE_FDIRSKEY 0x0EE6C +#define IXGBE_FDIRDIP4M 0x0EE3C +#define IXGBE_FDIRSIP4M 0x0EE40 +#define IXGBE_FDIRTCPM 0x0EE44 +#define IXGBE_FDIRUDPM 0x0EE48 +#define IXGBE_FDIRIP6M 0x0EE74 +#define IXGBE_FDIRM 0x0EE70 + +/* Flow Director Stats registers */ +#define IXGBE_FDIRFREE 0x0EE38 +#define IXGBE_FDIRLEN 0x0EE4C +#define IXGBE_FDIRUSTAT 0x0EE50 +#define IXGBE_FDIRFSTAT 0x0EE54 +#define IXGBE_FDIRMATCH 0x0EE58 +#define IXGBE_FDIRMISS 0x0EE5C + +/* Flow Director Programming registers */ +#define IXGBE_FDIRSIPv6(_i) (0x0EE0C + ((_i) * 4)) /* 3 of these (0-2) */ +#define IXGBE_FDIRIPSA 0x0EE18 +#define IXGBE_FDIRIPDA 0x0EE1C +#define IXGBE_FDIRPORT 0x0EE20 +#define IXGBE_FDIRVLAN 0x0EE24 +#define IXGBE_FDIRHASH 0x0EE28 +#define IXGBE_FDIRCMD 0x0EE2C + +/* Transmit DMA registers */ +#define IXGBE_TDBAL(_i) (0x06000 + ((_i) * 0x40)) /* 32 of them (0-31)*/ +#define IXGBE_TDBAH(_i) (0x06004 + ((_i) * 0x40)) +#define IXGBE_TDLEN(_i) (0x06008 + ((_i) * 0x40)) +#define IXGBE_TDH(_i) (0x06010 + ((_i) * 0x40)) +#define IXGBE_TDT(_i) (0x06018 + ((_i) * 0x40)) +#define IXGBE_TXDCTL(_i) (0x06028 + ((_i) * 0x40)) +#define IXGBE_TDWBAL(_i) (0x06038 + ((_i) * 0x40)) +#define IXGBE_TDWBAH(_i) (0x0603C + ((_i) * 0x40)) +#define IXGBE_DTXCTL 0x07E00 + +#define IXGBE_DMATXCTL 0x04A80 +#define IXGBE_PFVFSPOOF(_i) (0x08200 + ((_i) * 4)) /* 8 of these 0 - 7 */ +#define IXGBE_PFDTXGSWC 0x08220 +#define IXGBE_DTXMXSZRQ 0x08100 +#define IXGBE_DTXTCPFLGL 0x04A88 +#define IXGBE_DTXTCPFLGH 0x04A8C +#define IXGBE_LBDRPEN 0x0CA00 +#define IXGBE_TXPBTHRESH(_i) (0x04950 + ((_i) * 4)) /* 8 of these 0 - 7 */ + +#define IXGBE_DMATXCTL_TE 0x1 /* Transmit Enable */ +#define IXGBE_DMATXCTL_NS 0x2 /* No Snoop LSO hdr buffer */ +#define IXGBE_DMATXCTL_GDV 0x8 /* Global Double VLAN */ +#define IXGBE_DMATXCTL_VT_SHIFT 16 /* VLAN EtherType */ + +#define IXGBE_PFDTXGSWC_VT_LBEN 0x1 /* Local L2 VT switch enable */ + +/* Anti-spoofing defines */ +#define IXGBE_SPOOF_MACAS_MASK 0xFF +#define IXGBE_SPOOF_VLANAS_MASK 0xFF00 +#define IXGBE_SPOOF_VLANAS_SHIFT 8 +#define IXGBE_PFVFSPOOF_REG_COUNT 8 +/* 16 of these (0-15) */ +#define IXGBE_DCA_TXCTRL(_i) (0x07200 + ((_i) * 4)) +/* Tx DCA Control register : 128 of these (0-127) */ +#define IXGBE_DCA_TXCTRL_82599(_i) (0x0600C + ((_i) * 0x40)) +#define IXGBE_TIPG 0x0CB00 +#define IXGBE_TXPBSIZE(_i) (0x0CC00 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_MNGTXMAP 0x0CD10 +#define IXGBE_TIPG_FIBER_DEFAULT 3 +#define IXGBE_TXPBSIZE_SHIFT 10 + +/* Wake up registers */ +#define IXGBE_WUC 0x05800 +#define IXGBE_WUFC 0x05808 +#define IXGBE_WUS 0x05810 +#define IXGBE_IPAV 0x05838 +#define IXGBE_IP4AT 0x05840 /* IPv4 table 0x5840-0x5858 */ +#define IXGBE_IP6AT 0x05880 /* IPv6 table 0x5880-0x588F */ + +#define IXGBE_WUPL 0x05900 +#define IXGBE_WUPM 0x05A00 /* wake up pkt memory 0x5A00-0x5A7C */ +#define IXGBE_FHFT(_n) (0x09000 + (_n * 0x100)) /* Flex host filter table */ +/* Ext Flexible Host Filter Table */ +#define IXGBE_FHFT_EXT(_n) (0x09800 + (_n * 0x100)) + +#define IXGBE_FLEXIBLE_FILTER_COUNT_MAX 4 +#define IXGBE_EXT_FLEXIBLE_FILTER_COUNT_MAX 2 + +/* Each Flexible Filter is at most 128 (0x80) bytes in length */ +#define IXGBE_FLEXIBLE_FILTER_SIZE_MAX 128 +#define IXGBE_FHFT_LENGTH_OFFSET 0xFC /* Length byte in FHFT */ +#define IXGBE_FHFT_LENGTH_MASK 0x0FF /* Length in lower byte */ + +/* Definitions for power management and wakeup registers */ +/* Wake Up Control */ +#define IXGBE_WUC_PME_EN 0x00000002 /* PME Enable */ +#define IXGBE_WUC_PME_STATUS 0x00000004 /* PME Status */ +#define IXGBE_WUC_WKEN 0x00000010 /* Enable PE_WAKE_N pin assertion */ + +/* Wake Up Filter Control */ +#define IXGBE_WUFC_LNKC 0x00000001 /* Link Status Change Wakeup Enable */ +#define IXGBE_WUFC_MAG 0x00000002 /* Magic Packet Wakeup Enable */ +#define IXGBE_WUFC_EX 0x00000004 /* Directed Exact Wakeup Enable */ +#define IXGBE_WUFC_MC 0x00000008 /* Directed Multicast Wakeup Enable */ +#define IXGBE_WUFC_BC 0x00000010 /* Broadcast Wakeup Enable */ +#define IXGBE_WUFC_ARP 0x00000020 /* ARP Request Packet Wakeup Enable */ +#define IXGBE_WUFC_IPV4 0x00000040 /* Directed IPv4 Packet Wakeup Enable */ +#define IXGBE_WUFC_IPV6 0x00000080 /* Directed IPv6 Packet Wakeup Enable */ +#define IXGBE_WUFC_MNG 0x00000100 /* Directed Mgmt Packet Wakeup Enable */ + +#define IXGBE_WUFC_IGNORE_TCO 0x00008000 /* Ignore WakeOn TCO packets */ +#define IXGBE_WUFC_FLX0 0x00010000 /* Flexible Filter 0 Enable */ +#define IXGBE_WUFC_FLX1 0x00020000 /* Flexible Filter 1 Enable */ +#define IXGBE_WUFC_FLX2 0x00040000 /* Flexible Filter 2 Enable */ +#define IXGBE_WUFC_FLX3 0x00080000 /* Flexible Filter 3 Enable */ +#define IXGBE_WUFC_FLX4 0x00100000 /* Flexible Filter 4 Enable */ +#define IXGBE_WUFC_FLX5 0x00200000 /* Flexible Filter 5 Enable */ +#define IXGBE_WUFC_FLX_FILTERS 0x000F0000 /* Mask for 4 flex filters */ +/* Mask for Ext. flex filters */ +#define IXGBE_WUFC_EXT_FLX_FILTERS 0x00300000 +#define IXGBE_WUFC_ALL_FILTERS 0x003F00FF /* Mask for all wakeup filters */ +#define IXGBE_WUFC_FLX_OFFSET 16 /* Offset to the Flexible Filters bits */ + +/* Wake Up Status */ +#define IXGBE_WUS_LNKC IXGBE_WUFC_LNKC +#define IXGBE_WUS_MAG IXGBE_WUFC_MAG +#define IXGBE_WUS_EX IXGBE_WUFC_EX +#define IXGBE_WUS_MC IXGBE_WUFC_MC +#define IXGBE_WUS_BC IXGBE_WUFC_BC +#define IXGBE_WUS_ARP IXGBE_WUFC_ARP +#define IXGBE_WUS_IPV4 IXGBE_WUFC_IPV4 +#define IXGBE_WUS_IPV6 IXGBE_WUFC_IPV6 +#define IXGBE_WUS_MNG IXGBE_WUFC_MNG +#define IXGBE_WUS_FLX0 IXGBE_WUFC_FLX0 +#define IXGBE_WUS_FLX1 IXGBE_WUFC_FLX1 +#define IXGBE_WUS_FLX2 IXGBE_WUFC_FLX2 +#define IXGBE_WUS_FLX3 IXGBE_WUFC_FLX3 +#define IXGBE_WUS_FLX4 IXGBE_WUFC_FLX4 +#define IXGBE_WUS_FLX5 IXGBE_WUFC_FLX5 +#define IXGBE_WUS_FLX_FILTERS IXGBE_WUFC_FLX_FILTERS + +/* Wake Up Packet Length */ +#define IXGBE_WUPL_LENGTH_MASK 0xFFFF + +/* DCB registers */ +#define IXGBE_DCB_MAX_TRAFFIC_CLASS 8 +#define IXGBE_RMCS 0x03D00 +#define IXGBE_DPMCS 0x07F40 +#define IXGBE_PDPMCS 0x0CD00 +#define IXGBE_RUPPBMR 0x050A0 +#define IXGBE_RT2CR(_i) (0x03C20 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RT2SR(_i) (0x03C40 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_TDTQ2TCCR(_i) (0x0602C + ((_i) * 0x40)) /* 8 of these (0-7) */ +#define IXGBE_TDTQ2TCSR(_i) (0x0622C + ((_i) * 0x40)) /* 8 of these (0-7) */ +#define IXGBE_TDPT2TCCR(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_TDPT2TCSR(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */ + + +/* Security Control Registers */ +#define IXGBE_SECTXCTRL 0x08800 +#define IXGBE_SECTXSTAT 0x08804 +#define IXGBE_SECTXBUFFAF 0x08808 +#define IXGBE_SECTXMINIFG 0x08810 +#define IXGBE_SECRXCTRL 0x08D00 +#define IXGBE_SECRXSTAT 0x08D04 + +/* Security Bit Fields and Masks */ +#define IXGBE_SECTXCTRL_SECTX_DIS 0x00000001 +#define IXGBE_SECTXCTRL_TX_DIS 0x00000002 +#define IXGBE_SECTXCTRL_STORE_FORWARD 0x00000004 + +#define IXGBE_SECTXSTAT_SECTX_RDY 0x00000001 +#define IXGBE_SECTXSTAT_ECC_TXERR 0x00000002 + +#define IXGBE_SECRXCTRL_SECRX_DIS 0x00000001 +#define IXGBE_SECRXCTRL_RX_DIS 0x00000002 + +#define IXGBE_SECRXSTAT_SECRX_RDY 0x00000001 +#define IXGBE_SECRXSTAT_ECC_RXERR 0x00000002 + +/* LinkSec (MacSec) Registers */ +#define IXGBE_LSECTXCAP 0x08A00 +#define IXGBE_LSECRXCAP 0x08F00 +#define IXGBE_LSECTXCTRL 0x08A04 +#define IXGBE_LSECTXSCL 0x08A08 /* SCI Low */ +#define IXGBE_LSECTXSCH 0x08A0C /* SCI High */ +#define IXGBE_LSECTXSA 0x08A10 +#define IXGBE_LSECTXPN0 0x08A14 +#define IXGBE_LSECTXPN1 0x08A18 +#define IXGBE_LSECTXKEY0(_n) (0x08A1C + (4 * (_n))) /* 4 of these (0-3) */ +#define IXGBE_LSECTXKEY1(_n) (0x08A2C + (4 * (_n))) /* 4 of these (0-3) */ +#define IXGBE_LSECRXCTRL 0x08F04 +#define IXGBE_LSECRXSCL 0x08F08 +#define IXGBE_LSECRXSCH 0x08F0C +#define IXGBE_LSECRXSA(_i) (0x08F10 + (4 * (_i))) /* 2 of these (0-1) */ +#define IXGBE_LSECRXPN(_i) (0x08F18 + (4 * (_i))) /* 2 of these (0-1) */ +#define IXGBE_LSECRXKEY(_n, _m) (0x08F20 + ((0x10 * (_n)) + (4 * (_m)))) +#define IXGBE_LSECTXUT 0x08A3C /* OutPktsUntagged */ +#define IXGBE_LSECTXPKTE 0x08A40 /* OutPktsEncrypted */ +#define IXGBE_LSECTXPKTP 0x08A44 /* OutPktsProtected */ +#define IXGBE_LSECTXOCTE 0x08A48 /* OutOctetsEncrypted */ +#define IXGBE_LSECTXOCTP 0x08A4C /* OutOctetsProtected */ +#define IXGBE_LSECRXUT 0x08F40 /* InPktsUntagged/InPktsNoTag */ +#define IXGBE_LSECRXOCTD 0x08F44 /* InOctetsDecrypted */ +#define IXGBE_LSECRXOCTV 0x08F48 /* InOctetsValidated */ +#define IXGBE_LSECRXBAD 0x08F4C /* InPktsBadTag */ +#define IXGBE_LSECRXNOSCI 0x08F50 /* InPktsNoSci */ +#define IXGBE_LSECRXUNSCI 0x08F54 /* InPktsUnknownSci */ +#define IXGBE_LSECRXUNCH 0x08F58 /* InPktsUnchecked */ +#define IXGBE_LSECRXDELAY 0x08F5C /* InPktsDelayed */ +#define IXGBE_LSECRXLATE 0x08F60 /* InPktsLate */ +#define IXGBE_LSECRXOK(_n) (0x08F64 + (0x04 * (_n))) /* InPktsOk */ +#define IXGBE_LSECRXINV(_n) (0x08F6C + (0x04 * (_n))) /* InPktsInvalid */ +#define IXGBE_LSECRXNV(_n) (0x08F74 + (0x04 * (_n))) /* InPktsNotValid */ +#define IXGBE_LSECRXUNSA 0x08F7C /* InPktsUnusedSa */ +#define IXGBE_LSECRXNUSA 0x08F80 /* InPktsNotUsingSa */ + +/* LinkSec (MacSec) Bit Fields and Masks */ +#define IXGBE_LSECTXCAP_SUM_MASK 0x00FF0000 +#define IXGBE_LSECTXCAP_SUM_SHIFT 16 +#define IXGBE_LSECRXCAP_SUM_MASK 0x00FF0000 +#define IXGBE_LSECRXCAP_SUM_SHIFT 16 + +#define IXGBE_LSECTXCTRL_EN_MASK 0x00000003 +#define IXGBE_LSECTXCTRL_DISABLE 0x0 +#define IXGBE_LSECTXCTRL_AUTH 0x1 +#define IXGBE_LSECTXCTRL_AUTH_ENCRYPT 0x2 +#define IXGBE_LSECTXCTRL_AISCI 0x00000020 +#define IXGBE_LSECTXCTRL_PNTHRSH_MASK 0xFFFFFF00 +#define IXGBE_LSECTXCTRL_RSV_MASK 0x000000D8 + +#define IXGBE_LSECRXCTRL_EN_MASK 0x0000000C +#define IXGBE_LSECRXCTRL_EN_SHIFT 2 +#define IXGBE_LSECRXCTRL_DISABLE 0x0 +#define IXGBE_LSECRXCTRL_CHECK 0x1 +#define IXGBE_LSECRXCTRL_STRICT 0x2 +#define IXGBE_LSECRXCTRL_DROP 0x3 +#define IXGBE_LSECRXCTRL_PLSH 0x00000040 +#define IXGBE_LSECRXCTRL_RP 0x00000080 +#define IXGBE_LSECRXCTRL_RSV_MASK 0xFFFFFF33 + +/* IpSec Registers */ +#define IXGBE_IPSTXIDX 0x08900 +#define IXGBE_IPSTXSALT 0x08904 +#define IXGBE_IPSTXKEY(_i) (0x08908 + (4 * (_i))) /* 4 of these (0-3) */ +#define IXGBE_IPSRXIDX 0x08E00 +#define IXGBE_IPSRXIPADDR(_i) (0x08E04 + (4 * (_i))) /* 4 of these (0-3) */ +#define IXGBE_IPSRXSPI 0x08E14 +#define IXGBE_IPSRXIPIDX 0x08E18 +#define IXGBE_IPSRXKEY(_i) (0x08E1C + (4 * (_i))) /* 4 of these (0-3) */ +#define IXGBE_IPSRXSALT 0x08E2C +#define IXGBE_IPSRXMOD 0x08E30 + +#define IXGBE_SECTXCTRL_STORE_FORWARD_ENABLE 0x4 + +/* DCB registers */ +#define IXGBE_RTRPCS 0x02430 +#define IXGBE_RTTDCS 0x04900 +#define IXGBE_RTTDCS_ARBDIS 0x00000040 /* DCB arbiter disable */ +#define IXGBE_RTTPCS 0x0CD00 +#define IXGBE_RTRUP2TC 0x03020 +#define IXGBE_RTTUP2TC 0x0C800 +#define IXGBE_RTRPT4C(_i) (0x02140 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_TXLLQ(_i) (0x082E0 + ((_i) * 4)) /* 4 of these (0-3) */ +#define IXGBE_RTRPT4S(_i) (0x02160 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTDT2C(_i) (0x04910 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTDT2S(_i) (0x04930 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTPT2C(_i) (0x0CD20 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTPT2S(_i) (0x0CD40 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_RTTDQSEL 0x04904 +#define IXGBE_RTTDT1C 0x04908 +#define IXGBE_RTTDT1S 0x0490C +#define IXGBE_RTTDTECC 0x04990 +#define IXGBE_RTTDTECC_NO_BCN 0x00000100 + +#define IXGBE_RTTBCNRC 0x04984 +#define IXGBE_RTTBCNRC_RS_ENA 0x80000000 +#define IXGBE_RTTBCNRC_RF_DEC_MASK 0x00003FFF +#define IXGBE_RTTBCNRC_RF_INT_SHIFT 14 +#define IXGBE_RTTBCNRC_RF_INT_MASK \ + (IXGBE_RTTBCNRC_RF_DEC_MASK << IXGBE_RTTBCNRC_RF_INT_SHIFT) +#define IXGBE_RTTBCNRM 0x04980 + +/* FCoE DMA Context Registers */ +#define IXGBE_FCPTRL 0x02410 /* FC User Desc. PTR Low */ +#define IXGBE_FCPTRH 0x02414 /* FC USer Desc. PTR High */ +#define IXGBE_FCBUFF 0x02418 /* FC Buffer Control */ +#define IXGBE_FCDMARW 0x02420 /* FC Receive DMA RW */ +#define IXGBE_FCINVST0 0x03FC0 /* FC Invalid DMA Context Status Reg 0*/ +#define IXGBE_FCINVST(_i) (IXGBE_FCINVST0 + ((_i) * 4)) +#define IXGBE_FCBUFF_VALID (1 << 0) /* DMA Context Valid */ +#define IXGBE_FCBUFF_BUFFSIZE (3 << 3) /* User Buffer Size */ +#define IXGBE_FCBUFF_WRCONTX (1 << 7) /* 0: Initiator, 1: Target */ +#define IXGBE_FCBUFF_BUFFCNT 0x0000ff00 /* Number of User Buffers */ +#define IXGBE_FCBUFF_OFFSET 0xffff0000 /* User Buffer Offset */ +#define IXGBE_FCBUFF_BUFFSIZE_SHIFT 3 +#define IXGBE_FCBUFF_BUFFCNT_SHIFT 8 +#define IXGBE_FCBUFF_OFFSET_SHIFT 16 +#define IXGBE_FCDMARW_WE (1 << 14) /* Write enable */ +#define IXGBE_FCDMARW_RE (1 << 15) /* Read enable */ +#define IXGBE_FCDMARW_FCOESEL 0x000001ff /* FC X_ID: 11 bits */ +#define IXGBE_FCDMARW_LASTSIZE 0xffff0000 /* Last User Buffer Size */ +#define IXGBE_FCDMARW_LASTSIZE_SHIFT 16 +/* FCoE SOF/EOF */ +#define IXGBE_TEOFF 0x04A94 /* Tx FC EOF */ +#define IXGBE_TSOFF 0x04A98 /* Tx FC SOF */ +#define IXGBE_REOFF 0x05158 /* Rx FC EOF */ +#define IXGBE_RSOFF 0x051F8 /* Rx FC SOF */ +/* FCoE Filter Context Registers */ +#define IXGBE_FCFLT 0x05108 /* FC FLT Context */ +#define IXGBE_FCFLTRW 0x05110 /* FC Filter RW Control */ +#define IXGBE_FCPARAM 0x051d8 /* FC Offset Parameter */ +#define IXGBE_FCFLT_VALID (1 << 0) /* Filter Context Valid */ +#define IXGBE_FCFLT_FIRST (1 << 1) /* Filter First */ +#define IXGBE_FCFLT_SEQID 0x00ff0000 /* Sequence ID */ +#define IXGBE_FCFLT_SEQCNT 0xff000000 /* Sequence Count */ +#define IXGBE_FCFLTRW_RVALDT (1 << 13) /* Fast Re-Validation */ +#define IXGBE_FCFLTRW_WE (1 << 14) /* Write Enable */ +#define IXGBE_FCFLTRW_RE (1 << 15) /* Read Enable */ +/* FCoE Receive Control */ +#define IXGBE_FCRXCTRL 0x05100 /* FC Receive Control */ +#define IXGBE_FCRXCTRL_FCOELLI (1 << 0) /* Low latency interrupt */ +#define IXGBE_FCRXCTRL_SAVBAD (1 << 1) /* Save Bad Frames */ +#define IXGBE_FCRXCTRL_FRSTRDH (1 << 2) /* EN 1st Read Header */ +#define IXGBE_FCRXCTRL_LASTSEQH (1 << 3) /* EN Last Header in Seq */ +#define IXGBE_FCRXCTRL_ALLH (1 << 4) /* EN All Headers */ +#define IXGBE_FCRXCTRL_FRSTSEQH (1 << 5) /* EN 1st Seq. Header */ +#define IXGBE_FCRXCTRL_ICRC (1 << 6) /* Ignore Bad FC CRC */ +#define IXGBE_FCRXCTRL_FCCRCBO (1 << 7) /* FC CRC Byte Ordering */ +#define IXGBE_FCRXCTRL_FCOEVER 0x00000f00 /* FCoE Version: 4 bits */ +#define IXGBE_FCRXCTRL_FCOEVER_SHIFT 8 +/* FCoE Redirection */ +#define IXGBE_FCRECTL 0x0ED00 /* FC Redirection Control */ +#define IXGBE_FCRETA0 0x0ED10 /* FC Redirection Table 0 */ +#define IXGBE_FCRETA(_i) (IXGBE_FCRETA0 + ((_i) * 4)) /* FCoE Redir */ +#define IXGBE_FCRECTL_ENA 0x1 /* FCoE Redir Table Enable */ +#define IXGBE_FCRETASEL_ENA 0x2 /* FCoE FCRETASEL bit */ +#define IXGBE_FCRETA_SIZE 8 /* Max entries in FCRETA */ +#define IXGBE_FCRETA_ENTRY_MASK 0x0000007f /* 7 bits for the queue index */ + +/* Stats registers */ +#define IXGBE_CRCERRS 0x04000 +#define IXGBE_ILLERRC 0x04004 +#define IXGBE_ERRBC 0x04008 +#define IXGBE_MSPDC 0x04010 +#define IXGBE_MPC(_i) (0x03FA0 + ((_i) * 4)) /* 8 of these 3FA0-3FBC*/ +#define IXGBE_MLFC 0x04034 +#define IXGBE_MRFC 0x04038 +#define IXGBE_RLEC 0x04040 +#define IXGBE_LXONTXC 0x03F60 +#define IXGBE_LXONRXC 0x0CF60 +#define IXGBE_LXOFFTXC 0x03F68 +#define IXGBE_LXOFFRXC 0x0CF68 +#define IXGBE_LXONRXCNT 0x041A4 +#define IXGBE_LXOFFRXCNT 0x041A8 +#define IXGBE_PXONRXCNT(_i) (0x04140 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_PXOFFRXCNT(_i) (0x04160 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_PXON2OFFCNT(_i) (0x03240 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_PXONTXC(_i) (0x03F00 + ((_i) * 4)) /* 8 of these 3F00-3F1C*/ +#define IXGBE_PXONRXC(_i) (0x0CF00 + ((_i) * 4)) /* 8 of these CF00-CF1C*/ +#define IXGBE_PXOFFTXC(_i) (0x03F20 + ((_i) * 4)) /* 8 of these 3F20-3F3C*/ +#define IXGBE_PXOFFRXC(_i) (0x0CF20 + ((_i) * 4)) /* 8 of these CF20-CF3C*/ +#define IXGBE_PRC64 0x0405C +#define IXGBE_PRC127 0x04060 +#define IXGBE_PRC255 0x04064 +#define IXGBE_PRC511 0x04068 +#define IXGBE_PRC1023 0x0406C +#define IXGBE_PRC1522 0x04070 +#define IXGBE_GPRC 0x04074 +#define IXGBE_BPRC 0x04078 +#define IXGBE_MPRC 0x0407C +#define IXGBE_GPTC 0x04080 +#define IXGBE_GORCL 0x04088 +#define IXGBE_GORCH 0x0408C +#define IXGBE_GOTCL 0x04090 +#define IXGBE_GOTCH 0x04094 +#define IXGBE_RNBC(_i) (0x03FC0 + ((_i) * 4)) /* 8 of these 3FC0-3FDC*/ +#define IXGBE_RUC 0x040A4 +#define IXGBE_RFC 0x040A8 +#define IXGBE_ROC 0x040AC +#define IXGBE_RJC 0x040B0 +#define IXGBE_MNGPRC 0x040B4 +#define IXGBE_MNGPDC 0x040B8 +#define IXGBE_MNGPTC 0x0CF90 +#define IXGBE_TORL 0x040C0 +#define IXGBE_TORH 0x040C4 +#define IXGBE_TPR 0x040D0 +#define IXGBE_TPT 0x040D4 +#define IXGBE_PTC64 0x040D8 +#define IXGBE_PTC127 0x040DC +#define IXGBE_PTC255 0x040E0 +#define IXGBE_PTC511 0x040E4 +#define IXGBE_PTC1023 0x040E8 +#define IXGBE_PTC1522 0x040EC +#define IXGBE_MPTC 0x040F0 +#define IXGBE_BPTC 0x040F4 +#define IXGBE_XEC 0x04120 +#define IXGBE_SSVPC 0x08780 + +#define IXGBE_RQSMR(_i) (0x02300 + ((_i) * 4)) +#define IXGBE_TQSMR(_i) (((_i) <= 7) ? (0x07300 + ((_i) * 4)) : \ + (0x08600 + ((_i) * 4))) +#define IXGBE_TQSM(_i) (0x08600 + ((_i) * 4)) + +#define IXGBE_QPRC(_i) (0x01030 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QPTC(_i) (0x06030 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBRC(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBTC(_i) (0x06034 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBRC_L(_i) (0x01034 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBRC_H(_i) (0x01038 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QPRDC(_i) (0x01430 + ((_i) * 0x40)) /* 16 of these */ +#define IXGBE_QBTC_L(_i) (0x08700 + ((_i) * 0x8)) /* 16 of these */ +#define IXGBE_QBTC_H(_i) (0x08704 + ((_i) * 0x8)) /* 16 of these */ +#define IXGBE_FCCRC 0x05118 /* Num of Good Eth CRC w/ Bad FC CRC */ +#define IXGBE_FCOERPDC 0x0241C /* FCoE Rx Packets Dropped Count */ +#define IXGBE_FCLAST 0x02424 /* FCoE Last Error Count */ +#define IXGBE_FCOEPRC 0x02428 /* Number of FCoE Packets Received */ +#define IXGBE_FCOEDWRC 0x0242C /* Number of FCoE DWords Received */ +#define IXGBE_FCOEPTC 0x08784 /* Number of FCoE Packets Transmitted */ +#define IXGBE_FCOEDWTC 0x08788 /* Number of FCoE DWords Transmitted */ +#define IXGBE_FCCRC_CNT_MASK 0x0000FFFF /* CRC_CNT: bit 0 - 15 */ +#define IXGBE_FCLAST_CNT_MASK 0x0000FFFF /* Last_CNT: bit 0 - 15 */ +#define IXGBE_O2BGPTC 0x041C4 +#define IXGBE_O2BSPC 0x087B0 +#define IXGBE_B2OSPC 0x041C0 +#define IXGBE_B2OGPRC 0x02F90 +#define IXGBE_BUPRC 0x04180 +#define IXGBE_BMPRC 0x04184 +#define IXGBE_BBPRC 0x04188 +#define IXGBE_BUPTC 0x0418C +#define IXGBE_BMPTC 0x04190 +#define IXGBE_BBPTC 0x04194 +#define IXGBE_BCRCERRS 0x04198 +#define IXGBE_BXONRXC 0x0419C +#define IXGBE_BXOFFRXC 0x041E0 +#define IXGBE_BXONTXC 0x041E4 +#define IXGBE_BXOFFTXC 0x041E8 +#define IXGBE_PCRC8ECL 0x0E810 +#define IXGBE_PCRC8ECH 0x0E811 +#define IXGBE_PCRC8ECH_MASK 0x1F +#define IXGBE_LDPCECL 0x0E820 +#define IXGBE_LDPCECH 0x0E821 + +/* Management */ +#define IXGBE_MAVTV(_i) (0x05010 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_MFUTP(_i) (0x05030 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_MANC 0x05820 +#define IXGBE_MFVAL 0x05824 +#define IXGBE_MANC2H 0x05860 +#define IXGBE_MDEF(_i) (0x05890 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_MIPAF 0x058B0 +#define IXGBE_MMAL(_i) (0x05910 + ((_i) * 8)) /* 4 of these (0-3) */ +#define IXGBE_MMAH(_i) (0x05914 + ((_i) * 8)) /* 4 of these (0-3) */ +#define IXGBE_FTFT 0x09400 /* 0x9400-0x97FC */ +#define IXGBE_METF(_i) (0x05190 + ((_i) * 4)) /* 4 of these (0-3) */ +#define IXGBE_MDEF_EXT(_i) (0x05160 + ((_i) * 4)) /* 8 of these (0-7) */ +#define IXGBE_LSWFW 0x15014 +#define IXGBE_BMCIP(_i) (0x05050 + ((_i) * 4)) /* 0x5050-0x505C */ +#define IXGBE_BMCIPVAL 0x05060 +#define IXGBE_BMCIP_IPADDR_TYPE 0x00000001 +#define IXGBE_BMCIP_IPADDR_VALID 0x00000002 + +/* Management Bit Fields and Masks */ +#define IXGBE_MANC_EN_BMC2OS 0x10000000 /* Ena BMC2OS and OS2BMC traffic */ +#define IXGBE_MANC_EN_BMC2OS_SHIFT 28 + +/* Firmware Semaphore Register */ +#define IXGBE_FWSM_MODE_MASK 0xE + +/* ARC Subsystem registers */ +#define IXGBE_HICR 0x15F00 +#define IXGBE_FWSTS 0x15F0C +#define IXGBE_HSMC0R 0x15F04 +#define IXGBE_HSMC1R 0x15F08 +#define IXGBE_SWSR 0x15F10 +#define IXGBE_HFDR 0x15FE8 +#define IXGBE_FLEX_MNG 0x15800 /* 0x15800 - 0x15EFC */ + +#define IXGBE_HICR_EN 0x01 /* Enable bit - RO */ +/* Driver sets this bit when done to put command in RAM */ +#define IXGBE_HICR_C 0x02 +#define IXGBE_HICR_SV 0x04 /* Status Validity */ +#define IXGBE_HICR_FW_RESET_ENABLE 0x40 +#define IXGBE_HICR_FW_RESET 0x80 + +/* PCI-E registers */ +#define IXGBE_GCR 0x11000 +#define IXGBE_GTV 0x11004 +#define IXGBE_FUNCTAG 0x11008 +#define IXGBE_GLT 0x1100C +#define IXGBE_PCIEPIPEADR 0x11004 +#define IXGBE_PCIEPIPEDAT 0x11008 +#define IXGBE_GSCL_1 0x11010 +#define IXGBE_GSCL_2 0x11014 +#define IXGBE_GSCL_3 0x11018 +#define IXGBE_GSCL_4 0x1101C +#define IXGBE_GSCN_0 0x11020 +#define IXGBE_GSCN_1 0x11024 +#define IXGBE_GSCN_2 0x11028 +#define IXGBE_GSCN_3 0x1102C +#define IXGBE_FACTPS 0x10150 +#define IXGBE_PCIEANACTL 0x11040 +#define IXGBE_SWSM 0x10140 +#define IXGBE_FWSM 0x10148 +#define IXGBE_GSSR 0x10160 +#define IXGBE_MREVID 0x11064 +#define IXGBE_DCA_ID 0x11070 +#define IXGBE_DCA_CTRL 0x11074 +#define IXGBE_SWFW_SYNC IXGBE_GSSR + +/* PCI-E registers 82599-Specific */ +#define IXGBE_GCR_EXT 0x11050 +#define IXGBE_GSCL_5_82599 0x11030 +#define IXGBE_GSCL_6_82599 0x11034 +#define IXGBE_GSCL_7_82599 0x11038 +#define IXGBE_GSCL_8_82599 0x1103C +#define IXGBE_PHYADR_82599 0x11040 +#define IXGBE_PHYDAT_82599 0x11044 +#define IXGBE_PHYCTL_82599 0x11048 +#define IXGBE_PBACLR_82599 0x11068 +#define IXGBE_CIAA_82599 0x11088 +#define IXGBE_CIAD_82599 0x1108C +#define IXGBE_PICAUSE 0x110B0 +#define IXGBE_PIENA 0x110B8 +#define IXGBE_CDQ_MBR_82599 0x110B4 +#define IXGBE_PCIESPARE 0x110BC +#define IXGBE_MISC_REG_82599 0x110F0 +#define IXGBE_ECC_CTRL_0_82599 0x11100 +#define IXGBE_ECC_CTRL_1_82599 0x11104 +#define IXGBE_ECC_STATUS_82599 0x110E0 +#define IXGBE_BAR_CTRL_82599 0x110F4 + +/* PCI Express Control */ +#define IXGBE_GCR_CMPL_TMOUT_MASK 0x0000F000 +#define IXGBE_GCR_CMPL_TMOUT_10ms 0x00001000 +#define IXGBE_GCR_CMPL_TMOUT_RESEND 0x00010000 +#define IXGBE_GCR_CAP_VER2 0x00040000 + +#define IXGBE_GCR_EXT_MSIX_EN 0x80000000 +#define IXGBE_GCR_EXT_BUFFERS_CLEAR 0x40000000 +#define IXGBE_GCR_EXT_VT_MODE_16 0x00000001 +#define IXGBE_GCR_EXT_VT_MODE_32 0x00000002 +#define IXGBE_GCR_EXT_VT_MODE_64 0x00000003 +#define IXGBE_GCR_EXT_SRIOV (IXGBE_GCR_EXT_MSIX_EN | \ + IXGBE_GCR_EXT_VT_MODE_64) +/* Time Sync Registers */ +#define IXGBE_TSYNCRXCTL 0x05188 /* Rx Time Sync Control register - RW */ +#define IXGBE_TSYNCTXCTL 0x08C00 /* Tx Time Sync Control register - RW */ +#define IXGBE_RXSTMPL 0x051E8 /* Rx timestamp Low - RO */ +#define IXGBE_RXSTMPH 0x051A4 /* Rx timestamp High - RO */ +#define IXGBE_RXSATRL 0x051A0 /* Rx timestamp attribute low - RO */ +#define IXGBE_RXSATRH 0x051A8 /* Rx timestamp attribute high - RO */ +#define IXGBE_RXMTRL 0x05120 /* RX message type register low - RW */ +#define IXGBE_TXSTMPL 0x08C04 /* Tx timestamp value Low - RO */ +#define IXGBE_TXSTMPH 0x08C08 /* Tx timestamp value High - RO */ +#define IXGBE_SYSTIML 0x08C0C /* System time register Low - RO */ +#define IXGBE_SYSTIMH 0x08C10 /* System time register High - RO */ +#define IXGBE_TIMINCA 0x08C14 /* Increment attributes register - RW */ +#define IXGBE_TIMADJL 0x08C18 /* Time Adjustment Offset register Low - RW */ +#define IXGBE_TIMADJH 0x08C1C /* Time Adjustment Offset register High - RW */ +#define IXGBE_TSAUXC 0x08C20 /* TimeSync Auxiliary Control register - RW */ +#define IXGBE_TRGTTIML0 0x08C24 /* Target Time Register 0 Low - RW */ +#define IXGBE_TRGTTIMH0 0x08C28 /* Target Time Register 0 High - RW */ +#define IXGBE_TRGTTIML1 0x08C2C /* Target Time Register 1 Low - RW */ +#define IXGBE_TRGTTIMH1 0x08C30 /* Target Time Register 1 High - RW */ +#define IXGBE_FREQOUT0 0x08C34 /* Frequency Out 0 Control register - RW */ +#define IXGBE_FREQOUT1 0x08C38 /* Frequency Out 1 Control register - RW */ +#define IXGBE_AUXSTMPL0 0x08C3C /* Auxiliary Time Stamp 0 register Low - RO */ +#define IXGBE_AUXSTMPH0 0x08C40 /* Auxiliary Time Stamp 0 register High - RO */ +#define IXGBE_AUXSTMPL1 0x08C44 /* Auxiliary Time Stamp 1 register Low - RO */ +#define IXGBE_AUXSTMPH1 0x08C48 /* Auxiliary Time Stamp 1 register High - RO */ + +/* Diagnostic Registers */ +#define IXGBE_RDSTATCTL 0x02C20 +#define IXGBE_RDSTAT(_i) (0x02C00 + ((_i) * 4)) /* 0x02C00-0x02C1C */ +#define IXGBE_RDHMPN 0x02F08 +#define IXGBE_RIC_DW(_i) (0x02F10 + ((_i) * 4)) +#define IXGBE_RDPROBE 0x02F20 +#define IXGBE_RDMAM 0x02F30 +#define IXGBE_RDMAD 0x02F34 +#define IXGBE_TDSTATCTL 0x07C20 +#define IXGBE_TDSTAT(_i) (0x07C00 + ((_i) * 4)) /* 0x07C00 - 0x07C1C */ +#define IXGBE_TDHMPN 0x07F08 +#define IXGBE_TDHMPN2 0x082FC +#define IXGBE_TXDESCIC 0x082CC +#define IXGBE_TIC_DW(_i) (0x07F10 + ((_i) * 4)) +#define IXGBE_TIC_DW2(_i) (0x082B0 + ((_i) * 4)) +#define IXGBE_TDPROBE 0x07F20 +#define IXGBE_TXBUFCTRL 0x0C600 +#define IXGBE_TXBUFDATA0 0x0C610 +#define IXGBE_TXBUFDATA1 0x0C614 +#define IXGBE_TXBUFDATA2 0x0C618 +#define IXGBE_TXBUFDATA3 0x0C61C +#define IXGBE_RXBUFCTRL 0x03600 +#define IXGBE_RXBUFDATA0 0x03610 +#define IXGBE_RXBUFDATA1 0x03614 +#define IXGBE_RXBUFDATA2 0x03618 +#define IXGBE_RXBUFDATA3 0x0361C +#define IXGBE_PCIE_DIAG(_i) (0x11090 + ((_i) * 4)) /* 8 of these */ +#define IXGBE_RFVAL 0x050A4 +#define IXGBE_MDFTC1 0x042B8 +#define IXGBE_MDFTC2 0x042C0 +#define IXGBE_MDFTFIFO1 0x042C4 +#define IXGBE_MDFTFIFO2 0x042C8 +#define IXGBE_MDFTS 0x042CC +#define IXGBE_RXDATAWRPTR(_i) (0x03700 + ((_i) * 4)) /* 8 of these 3700-370C*/ +#define IXGBE_RXDESCWRPTR(_i) (0x03710 + ((_i) * 4)) /* 8 of these 3710-371C*/ +#define IXGBE_RXDATARDPTR(_i) (0x03720 + ((_i) * 4)) /* 8 of these 3720-372C*/ +#define IXGBE_RXDESCRDPTR(_i) (0x03730 + ((_i) * 4)) /* 8 of these 3730-373C*/ +#define IXGBE_TXDATAWRPTR(_i) (0x0C700 + ((_i) * 4)) /* 8 of these C700-C70C*/ +#define IXGBE_TXDESCWRPTR(_i) (0x0C710 + ((_i) * 4)) /* 8 of these C710-C71C*/ +#define IXGBE_TXDATARDPTR(_i) (0x0C720 + ((_i) * 4)) /* 8 of these C720-C72C*/ +#define IXGBE_TXDESCRDPTR(_i) (0x0C730 + ((_i) * 4)) /* 8 of these C730-C73C*/ +#define IXGBE_PCIEECCCTL 0x1106C +#define IXGBE_RXWRPTR(_i) (0x03100 + ((_i) * 4)) /* 8 of these 3100-310C*/ +#define IXGBE_RXUSED(_i) (0x03120 + ((_i) * 4)) /* 8 of these 3120-312C*/ +#define IXGBE_RXRDPTR(_i) (0x03140 + ((_i) * 4)) /* 8 of these 3140-314C*/ +#define IXGBE_RXRDWRPTR(_i) (0x03160 + ((_i) * 4)) /* 8 of these 3160-310C*/ +#define IXGBE_TXWRPTR(_i) (0x0C100 + ((_i) * 4)) /* 8 of these C100-C10C*/ +#define IXGBE_TXUSED(_i) (0x0C120 + ((_i) * 4)) /* 8 of these C120-C12C*/ +#define IXGBE_TXRDPTR(_i) (0x0C140 + ((_i) * 4)) /* 8 of these C140-C14C*/ +#define IXGBE_TXRDWRPTR(_i) (0x0C160 + ((_i) * 4)) /* 8 of these C160-C10C*/ +#define IXGBE_PCIEECCCTL0 0x11100 +#define IXGBE_PCIEECCCTL1 0x11104 +#define IXGBE_RXDBUECC 0x03F70 +#define IXGBE_TXDBUECC 0x0CF70 +#define IXGBE_RXDBUEST 0x03F74 +#define IXGBE_TXDBUEST 0x0CF74 +#define IXGBE_PBTXECC 0x0C300 +#define IXGBE_PBRXECC 0x03300 +#define IXGBE_GHECCR 0x110B0 + +/* MAC Registers */ +#define IXGBE_PCS1GCFIG 0x04200 +#define IXGBE_PCS1GLCTL 0x04208 +#define IXGBE_PCS1GLSTA 0x0420C +#define IXGBE_PCS1GDBG0 0x04210 +#define IXGBE_PCS1GDBG1 0x04214 +#define IXGBE_PCS1GANA 0x04218 +#define IXGBE_PCS1GANLP 0x0421C +#define IXGBE_PCS1GANNP 0x04220 +#define IXGBE_PCS1GANLPNP 0x04224 +#define IXGBE_HLREG0 0x04240 +#define IXGBE_HLREG1 0x04244 +#define IXGBE_PAP 0x04248 +#define IXGBE_MACA 0x0424C +#define IXGBE_APAE 0x04250 +#define IXGBE_ARD 0x04254 +#define IXGBE_AIS 0x04258 +#define IXGBE_MSCA 0x0425C +#define IXGBE_MSRWD 0x04260 +#define IXGBE_MLADD 0x04264 +#define IXGBE_MHADD 0x04268 +#define IXGBE_MAXFRS 0x04268 +#define IXGBE_TREG 0x0426C +#define IXGBE_PCSS1 0x04288 +#define IXGBE_PCSS2 0x0428C +#define IXGBE_XPCSS 0x04290 +#define IXGBE_MFLCN 0x04294 +#define IXGBE_SERDESC 0x04298 +#define IXGBE_MACS 0x0429C +#define IXGBE_AUTOC 0x042A0 +#define IXGBE_LINKS 0x042A4 +#define IXGBE_LINKS2 0x04324 +#define IXGBE_AUTOC2 0x042A8 +#define IXGBE_AUTOC3 0x042AC +#define IXGBE_ANLP1 0x042B0 +#define IXGBE_ANLP2 0x042B4 +#define IXGBE_MACC 0x04330 +#define IXGBE_ATLASCTL 0x04800 +#define IXGBE_MMNGC 0x042D0 +#define IXGBE_ANLPNP1 0x042D4 +#define IXGBE_ANLPNP2 0x042D8 +#define IXGBE_KRPCSFC 0x042E0 +#define IXGBE_KRPCSS 0x042E4 +#define IXGBE_FECS1 0x042E8 +#define IXGBE_FECS2 0x042EC +#define IXGBE_SMADARCTL 0x14F10 +#define IXGBE_MPVC 0x04318 +#define IXGBE_SGMIIC 0x04314 + +/* Statistics Registers */ +#define IXGBE_RXNFGPC 0x041B0 +#define IXGBE_RXNFGBCL 0x041B4 +#define IXGBE_RXNFGBCH 0x041B8 +#define IXGBE_RXDGPC 0x02F50 +#define IXGBE_RXDGBCL 0x02F54 +#define IXGBE_RXDGBCH 0x02F58 +#define IXGBE_RXDDGPC 0x02F5C +#define IXGBE_RXDDGBCL 0x02F60 +#define IXGBE_RXDDGBCH 0x02F64 +#define IXGBE_RXLPBKGPC 0x02F68 +#define IXGBE_RXLPBKGBCL 0x02F6C +#define IXGBE_RXLPBKGBCH 0x02F70 +#define IXGBE_RXDLPBKGPC 0x02F74 +#define IXGBE_RXDLPBKGBCL 0x02F78 +#define IXGBE_RXDLPBKGBCH 0x02F7C +#define IXGBE_TXDGPC 0x087A0 +#define IXGBE_TXDGBCL 0x087A4 +#define IXGBE_TXDGBCH 0x087A8 + +#define IXGBE_RXDSTATCTRL 0x02F40 + +/* Copper Pond 2 link timeout */ +#define IXGBE_VALIDATE_LINK_READY_TIMEOUT 50 + +/* Omer CORECTL */ +#define IXGBE_CORECTL 0x014F00 +/* BARCTRL */ +#define IXGBE_BARCTRL 0x110F4 +#define IXGBE_BARCTRL_FLSIZE 0x0700 +#define IXGBE_BARCTRL_FLSIZE_SHIFT 8 +#define IXGBE_BARCTRL_CSRSIZE 0x2000 + +/* RSCCTL Bit Masks */ +#define IXGBE_RSCCTL_RSCEN 0x01 +#define IXGBE_RSCCTL_MAXDESC_1 0x00 +#define IXGBE_RSCCTL_MAXDESC_4 0x04 +#define IXGBE_RSCCTL_MAXDESC_8 0x08 +#define IXGBE_RSCCTL_MAXDESC_16 0x0C + +/* RSCDBU Bit Masks */ +#define IXGBE_RSCDBU_RSCSMALDIS_MASK 0x0000007F +#define IXGBE_RSCDBU_RSCACKDIS 0x00000080 + +/* RDRXCTL Bit Masks */ +#define IXGBE_RDRXCTL_RDMTS_1_2 0x00000000 /* Rx Desc Min THLD Size */ +#define IXGBE_RDRXCTL_CRCSTRIP 0x00000002 /* CRC Strip */ +#define IXGBE_RDRXCTL_MVMEN 0x00000020 +#define IXGBE_RDRXCTL_DMAIDONE 0x00000008 /* DMA init cycle done */ +#define IXGBE_RDRXCTL_AGGDIS 0x00010000 /* Aggregation disable */ +#define IXGBE_RDRXCTL_RSCFRSTSIZE 0x003E0000 /* RSC First packet size */ +#define IXGBE_RDRXCTL_RSCLLIDIS 0x00800000 /* Disabl RSC compl on LLI */ +#define IXGBE_RDRXCTL_RSCACKC 0x02000000 /* must set 1 when RSC ena */ +#define IXGBE_RDRXCTL_FCOE_WRFIX 0x04000000 /* must set 1 when RSC ena */ + +/* RQTC Bit Masks and Shifts */ +#define IXGBE_RQTC_SHIFT_TC(_i) ((_i) * 4) +#define IXGBE_RQTC_TC0_MASK (0x7 << 0) +#define IXGBE_RQTC_TC1_MASK (0x7 << 4) +#define IXGBE_RQTC_TC2_MASK (0x7 << 8) +#define IXGBE_RQTC_TC3_MASK (0x7 << 12) +#define IXGBE_RQTC_TC4_MASK (0x7 << 16) +#define IXGBE_RQTC_TC5_MASK (0x7 << 20) +#define IXGBE_RQTC_TC6_MASK (0x7 << 24) +#define IXGBE_RQTC_TC7_MASK (0x7 << 28) + +/* PSRTYPE.RQPL Bit masks and shift */ +#define IXGBE_PSRTYPE_RQPL_MASK 0x7 +#define IXGBE_PSRTYPE_RQPL_SHIFT 29 + +/* CTRL Bit Masks */ +#define IXGBE_CTRL_GIO_DIS 0x00000004 /* Global IO Master Disable bit */ +#define IXGBE_CTRL_LNK_RST 0x00000008 /* Link Reset. Resets everything. */ +#define IXGBE_CTRL_RST 0x04000000 /* Reset (SW) */ +#define IXGBE_CTRL_RST_MASK (IXGBE_CTRL_LNK_RST | IXGBE_CTRL_RST) + +/* FACTPS */ +#define IXGBE_FACTPS_LFS 0x40000000 /* LAN Function Select */ + +/* MHADD Bit Masks */ +#define IXGBE_MHADD_MFS_MASK 0xFFFF0000 +#define IXGBE_MHADD_MFS_SHIFT 16 + +/* Extended Device Control */ +#define IXGBE_CTRL_EXT_PFRSTD 0x00004000 /* Physical Function Reset Done */ +#define IXGBE_CTRL_EXT_NS_DIS 0x00010000 /* No Snoop disable */ +#define IXGBE_CTRL_EXT_RO_DIS 0x00020000 /* Relaxed Ordering disable */ +#define IXGBE_CTRL_EXT_DRV_LOAD 0x10000000 /* Driver loaded bit for FW */ + +/* Direct Cache Access (DCA) definitions */ +#define IXGBE_DCA_CTRL_DCA_ENABLE 0x00000000 /* DCA Enable */ +#define IXGBE_DCA_CTRL_DCA_DISABLE 0x00000001 /* DCA Disable */ + +#define IXGBE_DCA_CTRL_DCA_MODE_CB1 0x00 /* DCA Mode CB1 */ +#define IXGBE_DCA_CTRL_DCA_MODE_CB2 0x02 /* DCA Mode CB2 */ + +#define IXGBE_DCA_RXCTRL_CPUID_MASK 0x0000001F /* Rx CPUID Mask */ +#define IXGBE_DCA_RXCTRL_CPUID_MASK_82599 0xFF000000 /* Rx CPUID Mask */ +#define IXGBE_DCA_RXCTRL_CPUID_SHIFT_82599 24 /* Rx CPUID Shift */ +#define IXGBE_DCA_RXCTRL_DESC_DCA_EN (1 << 5) /* Rx Desc enable */ +#define IXGBE_DCA_RXCTRL_HEAD_DCA_EN (1 << 6) /* Rx Desc header ena */ +#define IXGBE_DCA_RXCTRL_DATA_DCA_EN (1 << 7) /* Rx Desc payload ena */ +#define IXGBE_DCA_RXCTRL_DESC_RRO_EN (1 << 9) /* Rx rd Desc Relax Order */ +#define IXGBE_DCA_RXCTRL_DATA_WRO_EN (1 << 13) /* Rx wr data Relax Order */ +#define IXGBE_DCA_RXCTRL_HEAD_WRO_EN (1 << 15) /* Rx wr header RO */ + +#define IXGBE_DCA_TXCTRL_CPUID_MASK 0x0000001F /* Tx CPUID Mask */ +#define IXGBE_DCA_TXCTRL_CPUID_MASK_82599 0xFF000000 /* Tx CPUID Mask */ +#define IXGBE_DCA_TXCTRL_CPUID_SHIFT_82599 24 /* Tx CPUID Shift */ +#define IXGBE_DCA_TXCTRL_DESC_DCA_EN (1 << 5) /* DCA Tx Desc enable */ +#define IXGBE_DCA_TXCTRL_DESC_RRO_EN (1 << 9) /* Tx rd Desc Relax Order */ +#define IXGBE_DCA_TXCTRL_DESC_WRO_EN (1 << 11) /* Tx Desc writeback RO bit */ +#define IXGBE_DCA_TXCTRL_DATA_RRO_EN (1 << 13) /* Tx rd data Relax Order */ +#define IXGBE_DCA_MAX_QUEUES_82598 16 /* DCA regs only on 16 queues */ + +/* MSCA Bit Masks */ +#define IXGBE_MSCA_NP_ADDR_MASK 0x0000FFFF /* MDI Addr (new prot) */ +#define IXGBE_MSCA_NP_ADDR_SHIFT 0 +#define IXGBE_MSCA_DEV_TYPE_MASK 0x001F0000 /* Dev Type (new prot) */ +#define IXGBE_MSCA_DEV_TYPE_SHIFT 16 /* Register Address (old prot */ +#define IXGBE_MSCA_PHY_ADDR_MASK 0x03E00000 /* PHY Address mask */ +#define IXGBE_MSCA_PHY_ADDR_SHIFT 21 /* PHY Address shift*/ +#define IXGBE_MSCA_OP_CODE_MASK 0x0C000000 /* OP CODE mask */ +#define IXGBE_MSCA_OP_CODE_SHIFT 26 /* OP CODE shift */ +#define IXGBE_MSCA_ADDR_CYCLE 0x00000000 /* OP CODE 00 (addr cycle) */ +#define IXGBE_MSCA_WRITE 0x04000000 /* OP CODE 01 (wr) */ +#define IXGBE_MSCA_READ 0x0C000000 /* OP CODE 11 (rd) */ +#define IXGBE_MSCA_READ_AUTOINC 0x08000000 /* OP CODE 10 (rd auto inc)*/ +#define IXGBE_MSCA_ST_CODE_MASK 0x30000000 /* ST Code mask */ +#define IXGBE_MSCA_ST_CODE_SHIFT 28 /* ST Code shift */ +#define IXGBE_MSCA_NEW_PROTOCOL 0x00000000 /* ST CODE 00 (new prot) */ +#define IXGBE_MSCA_OLD_PROTOCOL 0x10000000 /* ST CODE 01 (old prot) */ +#define IXGBE_MSCA_MDI_COMMAND 0x40000000 /* Initiate MDI command */ +#define IXGBE_MSCA_MDI_IN_PROG_EN 0x80000000 /* MDI in progress ena */ + +/* MSRWD bit masks */ +#define IXGBE_MSRWD_WRITE_DATA_MASK 0x0000FFFF +#define IXGBE_MSRWD_WRITE_DATA_SHIFT 0 +#define IXGBE_MSRWD_READ_DATA_MASK 0xFFFF0000 +#define IXGBE_MSRWD_READ_DATA_SHIFT 16 + +/* Atlas registers */ +#define IXGBE_ATLAS_PDN_LPBK 0x24 +#define IXGBE_ATLAS_PDN_10G 0xB +#define IXGBE_ATLAS_PDN_1G 0xC +#define IXGBE_ATLAS_PDN_AN 0xD + +/* Atlas bit masks */ +#define IXGBE_ATLASCTL_WRITE_CMD 0x00010000 +#define IXGBE_ATLAS_PDN_TX_REG_EN 0x10 +#define IXGBE_ATLAS_PDN_TX_10G_QL_ALL 0xF0 +#define IXGBE_ATLAS_PDN_TX_1G_QL_ALL 0xF0 +#define IXGBE_ATLAS_PDN_TX_AN_QL_ALL 0xF0 + +/* Omer bit masks */ +#define IXGBE_CORECTL_WRITE_CMD 0x00010000 + +/* Device Type definitions for new protocol MDIO commands */ +#define IXGBE_MDIO_PMA_PMD_DEV_TYPE 0x1 +#define IXGBE_MDIO_PCS_DEV_TYPE 0x3 +#define IXGBE_MDIO_PHY_XS_DEV_TYPE 0x4 +#define IXGBE_MDIO_AUTO_NEG_DEV_TYPE 0x7 +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_DEV_TYPE 0x1E /* Device 30 */ +#define IXGBE_TWINAX_DEV 1 + +#define IXGBE_MDIO_COMMAND_TIMEOUT 100 /* PHY Timeout for 1 GB mode */ + +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_CONTROL 0x0 /* VS1 Ctrl Reg */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_STATUS 0x1 /* VS1 Status Reg */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_LINK_STATUS 0x0008 /* 1 = Link Up */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_SPEED_STATUS 0x0010 /* 0-10G, 1-1G */ +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_10G_SPEED 0x0018 +#define IXGBE_MDIO_VENDOR_SPECIFIC_1_1G_SPEED 0x0010 + +#define IXGBE_MDIO_AUTO_NEG_CONTROL 0x0 /* AUTO_NEG Control Reg */ +#define IXGBE_MDIO_AUTO_NEG_STATUS 0x1 /* AUTO_NEG Status Reg */ +#define IXGBE_MDIO_AUTO_NEG_ADVT 0x10 /* AUTO_NEG Advt Reg */ +#define IXGBE_MDIO_AUTO_NEG_LP 0x13 /* AUTO_NEG LP Status Reg */ +#define IXGBE_MDIO_PHY_XS_CONTROL 0x0 /* PHY_XS Control Reg */ +#define IXGBE_MDIO_PHY_XS_RESET 0x8000 /* PHY_XS Reset */ +#define IXGBE_MDIO_PHY_ID_HIGH 0x2 /* PHY ID High Reg*/ +#define IXGBE_MDIO_PHY_ID_LOW 0x3 /* PHY ID Low Reg*/ +#define IXGBE_MDIO_PHY_SPEED_ABILITY 0x4 /* Speed Ability Reg */ +#define IXGBE_MDIO_PHY_SPEED_10G 0x0001 /* 10G capable */ +#define IXGBE_MDIO_PHY_SPEED_1G 0x0010 /* 1G capable */ +#define IXGBE_MDIO_PHY_SPEED_100M 0x0020 /* 100M capable */ +#define IXGBE_MDIO_PHY_EXT_ABILITY 0xB /* Ext Ability Reg */ +#define IXGBE_MDIO_PHY_10GBASET_ABILITY 0x0004 /* 10GBaseT capable */ +#define IXGBE_MDIO_PHY_1000BASET_ABILITY 0x0020 /* 1000BaseT capable */ +#define IXGBE_MDIO_PHY_100BASETX_ABILITY 0x0080 /* 100BaseTX capable */ +#define IXGBE_MDIO_PHY_SET_LOW_POWER_MODE 0x0800 /* Set low power mode */ + +#define IXGBE_MDIO_PMA_PMD_CONTROL_ADDR 0x0000 /* PMA/PMD Control Reg */ +#define IXGBE_MDIO_PMA_PMD_SDA_SCL_ADDR 0xC30A /* PHY_XS SDA/SCL Addr Reg */ +#define IXGBE_MDIO_PMA_PMD_SDA_SCL_DATA 0xC30B /* PHY_XS SDA/SCL Data Reg */ +#define IXGBE_MDIO_PMA_PMD_SDA_SCL_STAT 0xC30C /* PHY_XS SDA/SCL Status Reg */ + +/* MII clause 22/28 definitions */ +#define IXGBE_MDIO_PHY_LOW_POWER_MODE 0x0800 + +#define IXGBE_MII_10GBASE_T_AUTONEG_CTRL_REG 0x20 /* 10G Control Reg */ +#define IXGBE_MII_AUTONEG_VENDOR_PROVISION_1_REG 0xC400 /* 1G Provisioning 1 */ +#define IXGBE_MII_AUTONEG_XNP_TX_REG 0x17 /* 1G XNP Transmit */ +#define IXGBE_MII_AUTONEG_ADVERTISE_REG 0x10 /* 100M Advertisement */ +#define IXGBE_MII_10GBASE_T_ADVERTISE 0x1000 /* full duplex, bit:12*/ +#define IXGBE_MII_1GBASE_T_ADVERTISE_XNP_TX 0x4000 /* full duplex, bit:14*/ +#define IXGBE_MII_1GBASE_T_ADVERTISE 0x8000 /* full duplex, bit:15*/ +#define IXGBE_MII_100BASE_T_ADVERTISE 0x0100 /* full duplex, bit:8 */ +#define IXGBE_MII_100BASE_T_ADVERTISE_HALF 0x0080 /* half duplex, bit:7 */ +#define IXGBE_MII_RESTART 0x200 +#define IXGBE_MII_AUTONEG_COMPLETE 0x20 +#define IXGBE_MII_AUTONEG_LINK_UP 0x04 +#define IXGBE_MII_AUTONEG_REG 0x0 + +#define IXGBE_PHY_REVISION_MASK 0xFFFFFFF0 +#define IXGBE_MAX_PHY_ADDR 32 + +/* PHY IDs*/ +#define TN1010_PHY_ID 0x00A19410 +#define TNX_FW_REV 0xB +#define X540_PHY_ID 0x01540200 +#define AQ_FW_REV 0x20 +#define QT2022_PHY_ID 0x0043A400 +#define ATH_PHY_ID 0x03429050 + +/* PHY Types */ +#define IXGBE_M88E1145_E_PHY_ID 0x01410CD0 + +/* Special PHY Init Routine */ +#define IXGBE_PHY_INIT_OFFSET_NL 0x002B +#define IXGBE_PHY_INIT_END_NL 0xFFFF +#define IXGBE_CONTROL_MASK_NL 0xF000 +#define IXGBE_DATA_MASK_NL 0x0FFF +#define IXGBE_CONTROL_SHIFT_NL 12 +#define IXGBE_DELAY_NL 0 +#define IXGBE_DATA_NL 1 +#define IXGBE_CONTROL_NL 0x000F +#define IXGBE_CONTROL_EOL_NL 0x0FFF +#define IXGBE_CONTROL_SOL_NL 0x0000 + +/* General purpose Interrupt Enable */ +#define IXGBE_SDP0_GPIEN 0x00000001 /* SDP0 */ +#define IXGBE_SDP1_GPIEN 0x00000002 /* SDP1 */ +#define IXGBE_SDP2_GPIEN 0x00000004 /* SDP2 */ +#define IXGBE_GPIE_MSIX_MODE 0x00000010 /* MSI-X mode */ +#define IXGBE_GPIE_OCD 0x00000020 /* Other Clear Disable */ +#define IXGBE_GPIE_EIMEN 0x00000040 /* Immediate Interrupt Enable */ +#define IXGBE_GPIE_EIAME 0x40000000 +#define IXGBE_GPIE_PBA_SUPPORT 0x80000000 +#define IXGBE_GPIE_RSC_DELAY_SHIFT 11 +#define IXGBE_GPIE_VTMODE_MASK 0x0000C000 /* VT Mode Mask */ +#define IXGBE_GPIE_VTMODE_16 0x00004000 /* 16 VFs 8 queues per VF */ +#define IXGBE_GPIE_VTMODE_32 0x00008000 /* 32 VFs 4 queues per VF */ +#define IXGBE_GPIE_VTMODE_64 0x0000C000 /* 64 VFs 2 queues per VF */ + +/* Packet Buffer Initialization */ +#define IXGBE_MAX_PACKET_BUFFERS 8 + +#define IXGBE_TXPBSIZE_20KB 0x00005000 /* 20KB Packet Buffer */ +#define IXGBE_TXPBSIZE_40KB 0x0000A000 /* 40KB Packet Buffer */ +#define IXGBE_RXPBSIZE_48KB 0x0000C000 /* 48KB Packet Buffer */ +#define IXGBE_RXPBSIZE_64KB 0x00010000 /* 64KB Packet Buffer */ +#define IXGBE_RXPBSIZE_80KB 0x00014000 /* 80KB Packet Buffer */ +#define IXGBE_RXPBSIZE_128KB 0x00020000 /* 128KB Packet Buffer */ +#define IXGBE_RXPBSIZE_MAX 0x00080000 /* 512KB Packet Buffer */ +#define IXGBE_TXPBSIZE_MAX 0x00028000 /* 160KB Packet Buffer */ + +#define IXGBE_TXPKT_SIZE_MAX 0xA /* Max Tx Packet size */ +#define IXGBE_MAX_PB 8 + +/* Packet buffer allocation strategies */ +enum { + PBA_STRATEGY_EQUAL = 0, /* Distribute PB space equally */ +#define PBA_STRATEGY_EQUAL PBA_STRATEGY_EQUAL + PBA_STRATEGY_WEIGHTED = 1, /* Weight front half of TCs */ +#define PBA_STRATEGY_WEIGHTED PBA_STRATEGY_WEIGHTED +}; + +/* Transmit Flow Control status */ +#define IXGBE_TFCS_TXOFF 0x00000001 +#define IXGBE_TFCS_TXOFF0 0x00000100 +#define IXGBE_TFCS_TXOFF1 0x00000200 +#define IXGBE_TFCS_TXOFF2 0x00000400 +#define IXGBE_TFCS_TXOFF3 0x00000800 +#define IXGBE_TFCS_TXOFF4 0x00001000 +#define IXGBE_TFCS_TXOFF5 0x00002000 +#define IXGBE_TFCS_TXOFF6 0x00004000 +#define IXGBE_TFCS_TXOFF7 0x00008000 + +/* TCP Timer */ +#define IXGBE_TCPTIMER_KS 0x00000100 +#define IXGBE_TCPTIMER_COUNT_ENABLE 0x00000200 +#define IXGBE_TCPTIMER_COUNT_FINISH 0x00000400 +#define IXGBE_TCPTIMER_LOOP 0x00000800 +#define IXGBE_TCPTIMER_DURATION_MASK 0x000000FF + +/* HLREG0 Bit Masks */ +#define IXGBE_HLREG0_TXCRCEN 0x00000001 /* bit 0 */ +#define IXGBE_HLREG0_RXCRCSTRP 0x00000002 /* bit 1 */ +#define IXGBE_HLREG0_JUMBOEN 0x00000004 /* bit 2 */ +#define IXGBE_HLREG0_TXPADEN 0x00000400 /* bit 10 */ +#define IXGBE_HLREG0_TXPAUSEEN 0x00001000 /* bit 12 */ +#define IXGBE_HLREG0_RXPAUSEEN 0x00004000 /* bit 14 */ +#define IXGBE_HLREG0_LPBK 0x00008000 /* bit 15 */ +#define IXGBE_HLREG0_MDCSPD 0x00010000 /* bit 16 */ +#define IXGBE_HLREG0_CONTMDC 0x00020000 /* bit 17 */ +#define IXGBE_HLREG0_CTRLFLTR 0x00040000 /* bit 18 */ +#define IXGBE_HLREG0_PREPEND 0x00F00000 /* bits 20-23 */ +#define IXGBE_HLREG0_PRIPAUSEEN 0x01000000 /* bit 24 */ +#define IXGBE_HLREG0_RXPAUSERECDA 0x06000000 /* bits 25-26 */ +#define IXGBE_HLREG0_RXLNGTHERREN 0x08000000 /* bit 27 */ +#define IXGBE_HLREG0_RXPADSTRIPEN 0x10000000 /* bit 28 */ + +/* VMD_CTL bitmasks */ +#define IXGBE_VMD_CTL_VMDQ_EN 0x00000001 +#define IXGBE_VMD_CTL_VMDQ_FILTER 0x00000002 + +/* VT_CTL bitmasks */ +#define IXGBE_VT_CTL_DIS_DEFPL 0x20000000 /* disable default pool */ +#define IXGBE_VT_CTL_REPLEN 0x40000000 /* replication enabled */ +#define IXGBE_VT_CTL_VT_ENABLE 0x00000001 /* Enable VT Mode */ +#define IXGBE_VT_CTL_POOL_SHIFT 7 +#define IXGBE_VT_CTL_POOL_MASK (0x3F << IXGBE_VT_CTL_POOL_SHIFT) + +/* VMOLR bitmasks */ +#define IXGBE_VMOLR_AUPE 0x01000000 /* accept untagged packets */ +#define IXGBE_VMOLR_ROMPE 0x02000000 /* accept packets in MTA tbl */ +#define IXGBE_VMOLR_ROPE 0x04000000 /* accept packets in UC tbl */ +#define IXGBE_VMOLR_BAM 0x08000000 /* accept broadcast packets */ +#define IXGBE_VMOLR_MPE 0x10000000 /* multicast promiscuous */ + +/* VFRE bitmask */ +#define IXGBE_VFRE_ENABLE_ALL 0xFFFFFFFF + +#define IXGBE_VF_INIT_TIMEOUT 200 /* Number of retries to clear RSTI */ + +/* RDHMPN and TDHMPN bitmasks */ +#define IXGBE_RDHMPN_RDICADDR 0x007FF800 +#define IXGBE_RDHMPN_RDICRDREQ 0x00800000 +#define IXGBE_RDHMPN_RDICADDR_SHIFT 11 +#define IXGBE_TDHMPN_TDICADDR 0x003FF800 +#define IXGBE_TDHMPN_TDICRDREQ 0x00800000 +#define IXGBE_TDHMPN_TDICADDR_SHIFT 11 + +#define IXGBE_RDMAM_MEM_SEL_SHIFT 13 +#define IXGBE_RDMAM_DWORD_SHIFT 9 +#define IXGBE_RDMAM_DESC_COMP_FIFO 1 +#define IXGBE_RDMAM_DFC_CMD_FIFO 2 +#define IXGBE_RDMAM_RSC_HEADER_ADDR 3 +#define IXGBE_RDMAM_TCN_STATUS_RAM 4 +#define IXGBE_RDMAM_WB_COLL_FIFO 5 +#define IXGBE_RDMAM_QSC_CNT_RAM 6 +#define IXGBE_RDMAM_QSC_FCOE_RAM 7 +#define IXGBE_RDMAM_QSC_QUEUE_CNT 8 +#define IXGBE_RDMAM_QSC_QUEUE_RAM 0xA +#define IXGBE_RDMAM_QSC_RSC_RAM 0xB +#define IXGBE_RDMAM_DESC_COM_FIFO_RANGE 135 +#define IXGBE_RDMAM_DESC_COM_FIFO_COUNT 4 +#define IXGBE_RDMAM_DFC_CMD_FIFO_RANGE 48 +#define IXGBE_RDMAM_DFC_CMD_FIFO_COUNT 7 +#define IXGBE_RDMAM_RSC_HEADER_ADDR_RANGE 32 +#define IXGBE_RDMAM_RSC_HEADER_ADDR_COUNT 4 +#define IXGBE_RDMAM_TCN_STATUS_RAM_RANGE 256 +#define IXGBE_RDMAM_TCN_STATUS_RAM_COUNT 9 +#define IXGBE_RDMAM_WB_COLL_FIFO_RANGE 8 +#define IXGBE_RDMAM_WB_COLL_FIFO_COUNT 4 +#define IXGBE_RDMAM_QSC_CNT_RAM_RANGE 64 +#define IXGBE_RDMAM_QSC_CNT_RAM_COUNT 4 +#define IXGBE_RDMAM_QSC_FCOE_RAM_RANGE 512 +#define IXGBE_RDMAM_QSC_FCOE_RAM_COUNT 5 +#define IXGBE_RDMAM_QSC_QUEUE_CNT_RANGE 32 +#define IXGBE_RDMAM_QSC_QUEUE_CNT_COUNT 4 +#define IXGBE_RDMAM_QSC_QUEUE_RAM_RANGE 128 +#define IXGBE_RDMAM_QSC_QUEUE_RAM_COUNT 8 +#define IXGBE_RDMAM_QSC_RSC_RAM_RANGE 32 +#define IXGBE_RDMAM_QSC_RSC_RAM_COUNT 8 + +#define IXGBE_TXDESCIC_READY 0x80000000 + +/* Receive Checksum Control */ +#define IXGBE_RXCSUM_IPPCSE 0x00001000 /* IP payload checksum enable */ +#define IXGBE_RXCSUM_PCSD 0x00002000 /* packet checksum disabled */ + +/* FCRTL Bit Masks */ +#define IXGBE_FCRTL_XONE 0x80000000 /* XON enable */ +#define IXGBE_FCRTH_FCEN 0x80000000 /* Packet buffer fc enable */ + +/* PAP bit masks*/ +#define IXGBE_PAP_TXPAUSECNT_MASK 0x0000FFFF /* Pause counter mask */ + +/* RMCS Bit Masks */ +#define IXGBE_RMCS_RRM 0x00000002 /* Rx Recycle Mode enable */ +/* Receive Arbitration Control: 0 Round Robin, 1 DFP */ +#define IXGBE_RMCS_RAC 0x00000004 +/* Deficit Fixed Prio ena */ +#define IXGBE_RMCS_DFP IXGBE_RMCS_RAC +#define IXGBE_RMCS_TFCE_802_3X 0x00000008 /* Tx Priority FC ena */ +#define IXGBE_RMCS_TFCE_PRIORITY 0x00000010 /* Tx Priority FC ena */ +#define IXGBE_RMCS_ARBDIS 0x00000040 /* Arbitration disable bit */ + +/* FCCFG Bit Masks */ +#define IXGBE_FCCFG_TFCE_802_3X 0x00000008 /* Tx link FC enable */ +#define IXGBE_FCCFG_TFCE_PRIORITY 0x00000010 /* Tx priority FC enable */ + +/* Interrupt register bitmasks */ + +/* Extended Interrupt Cause Read */ +#define IXGBE_EICR_RTX_QUEUE 0x0000FFFF /* RTx Queue Interrupt */ +#define IXGBE_EICR_FLOW_DIR 0x00010000 /* FDir Exception */ +#define IXGBE_EICR_RX_MISS 0x00020000 /* Packet Buffer Overrun */ +#define IXGBE_EICR_PCI 0x00040000 /* PCI Exception */ +#define IXGBE_EICR_MAILBOX 0x00080000 /* VF to PF Mailbox Interrupt */ +#define IXGBE_EICR_LSC 0x00100000 /* Link Status Change */ +#define IXGBE_EICR_LINKSEC 0x00200000 /* PN Threshold */ +#define IXGBE_EICR_MNG 0x00400000 /* Manageability Event Interrupt */ +#define IXGBE_EICR_TS 0x00800000 /* Thermal Sensor Event */ +#define IXGBE_EICR_GPI_SDP0 0x01000000 /* Gen Purpose Interrupt on SDP0 */ +#define IXGBE_EICR_GPI_SDP1 0x02000000 /* Gen Purpose Interrupt on SDP1 */ +#define IXGBE_EICR_GPI_SDP2 0x04000000 /* Gen Purpose Interrupt on SDP2 */ +#define IXGBE_EICR_ECC 0x10000000 /* ECC Error */ +#define IXGBE_EICR_PBUR 0x10000000 /* Packet Buffer Handler Error */ +#define IXGBE_EICR_DHER 0x20000000 /* Descriptor Handler Error */ +#define IXGBE_EICR_TCP_TIMER 0x40000000 /* TCP Timer */ +#define IXGBE_EICR_OTHER 0x80000000 /* Interrupt Cause Active */ + +/* Extended Interrupt Cause Set */ +#define IXGBE_EICS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ +#define IXGBE_EICS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ +#define IXGBE_EICS_RX_MISS IXGBE_EICR_RX_MISS /* Pkt Buffer Overrun */ +#define IXGBE_EICS_PCI IXGBE_EICR_PCI /* PCI Exception */ +#define IXGBE_EICS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ +#define IXGBE_EICS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EICS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EICS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ +#define IXGBE_EICS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ +#define IXGBE_EICS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ +#define IXGBE_EICS_ECC IXGBE_EICR_ECC /* ECC Error */ +#define IXGBE_EICS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ +#define IXGBE_EICS_DHER IXGBE_EICR_DHER /* Desc Handler Error */ +#define IXGBE_EICS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ +#define IXGBE_EICS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ + +/* Extended Interrupt Mask Set */ +#define IXGBE_EIMS_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ +#define IXGBE_EIMS_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ +#define IXGBE_EIMS_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */ +#define IXGBE_EIMS_PCI IXGBE_EICR_PCI /* PCI Exception */ +#define IXGBE_EIMS_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ +#define IXGBE_EIMS_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMS_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EIMS_TS IXGBE_EICR_TS /* Thermal Sensor Event */ +#define IXGBE_EIMS_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ +#define IXGBE_EIMS_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ +#define IXGBE_EIMS_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ +#define IXGBE_EIMS_ECC IXGBE_EICR_ECC /* ECC Error */ +#define IXGBE_EIMS_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ +#define IXGBE_EIMS_DHER IXGBE_EICR_DHER /* Descr Handler Error */ +#define IXGBE_EIMS_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ +#define IXGBE_EIMS_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ + +/* Extended Interrupt Mask Clear */ +#define IXGBE_EIMC_RTX_QUEUE IXGBE_EICR_RTX_QUEUE /* RTx Queue Interrupt */ +#define IXGBE_EIMC_FLOW_DIR IXGBE_EICR_FLOW_DIR /* FDir Exception */ +#define IXGBE_EIMC_RX_MISS IXGBE_EICR_RX_MISS /* Packet Buffer Overrun */ +#define IXGBE_EIMC_PCI IXGBE_EICR_PCI /* PCI Exception */ +#define IXGBE_EIMC_MAILBOX IXGBE_EICR_MAILBOX /* VF to PF Mailbox Int */ +#define IXGBE_EIMC_LSC IXGBE_EICR_LSC /* Link Status Change */ +#define IXGBE_EIMC_MNG IXGBE_EICR_MNG /* MNG Event Interrupt */ +#define IXGBE_EIMC_GPI_SDP0 IXGBE_EICR_GPI_SDP0 /* SDP0 Gen Purpose Int */ +#define IXGBE_EIMC_GPI_SDP1 IXGBE_EICR_GPI_SDP1 /* SDP1 Gen Purpose Int */ +#define IXGBE_EIMC_GPI_SDP2 IXGBE_EICR_GPI_SDP2 /* SDP2 Gen Purpose Int */ +#define IXGBE_EIMC_ECC IXGBE_EICR_ECC /* ECC Error */ +#define IXGBE_EIMC_PBUR IXGBE_EICR_PBUR /* Pkt Buf Handler Err */ +#define IXGBE_EIMC_DHER IXGBE_EICR_DHER /* Desc Handler Err */ +#define IXGBE_EIMC_TCP_TIMER IXGBE_EICR_TCP_TIMER /* TCP Timer */ +#define IXGBE_EIMC_OTHER IXGBE_EICR_OTHER /* INT Cause Active */ + +#define IXGBE_EIMS_ENABLE_MASK ( \ + IXGBE_EIMS_RTX_QUEUE | \ + IXGBE_EIMS_LSC | \ + IXGBE_EIMS_TCP_TIMER | \ + IXGBE_EIMS_OTHER) + +/* Immediate Interrupt Rx (A.K.A. Low Latency Interrupt) */ +#define IXGBE_IMIR_PORT_IM_EN 0x00010000 /* TCP port enable */ +#define IXGBE_IMIR_PORT_BP 0x00020000 /* TCP port check bypass */ +#define IXGBE_IMIREXT_SIZE_BP 0x00001000 /* Packet size bypass */ +#define IXGBE_IMIREXT_CTRL_URG 0x00002000 /* Check URG bit in header */ +#define IXGBE_IMIREXT_CTRL_ACK 0x00004000 /* Check ACK bit in header */ +#define IXGBE_IMIREXT_CTRL_PSH 0x00008000 /* Check PSH bit in header */ +#define IXGBE_IMIREXT_CTRL_RST 0x00010000 /* Check RST bit in header */ +#define IXGBE_IMIREXT_CTRL_SYN 0x00020000 /* Check SYN bit in header */ +#define IXGBE_IMIREXT_CTRL_FIN 0x00040000 /* Check FIN bit in header */ +#define IXGBE_IMIREXT_CTRL_BP 0x00080000 /* Bypass check of control bits */ +#define IXGBE_IMIR_SIZE_BP_82599 0x00001000 /* Packet size bypass */ +#define IXGBE_IMIR_CTRL_URG_82599 0x00002000 /* Check URG bit in header */ +#define IXGBE_IMIR_CTRL_ACK_82599 0x00004000 /* Check ACK bit in header */ +#define IXGBE_IMIR_CTRL_PSH_82599 0x00008000 /* Check PSH bit in header */ +#define IXGBE_IMIR_CTRL_RST_82599 0x00010000 /* Check RST bit in header */ +#define IXGBE_IMIR_CTRL_SYN_82599 0x00020000 /* Check SYN bit in header */ +#define IXGBE_IMIR_CTRL_FIN_82599 0x00040000 /* Check FIN bit in header */ +#define IXGBE_IMIR_CTRL_BP_82599 0x00080000 /* Bypass chk of ctrl bits */ +#define IXGBE_IMIR_LLI_EN_82599 0x00100000 /* Enables low latency Int */ +#define IXGBE_IMIR_RX_QUEUE_MASK_82599 0x0000007F /* Rx Queue Mask */ +#define IXGBE_IMIR_RX_QUEUE_SHIFT_82599 21 /* Rx Queue Shift */ +#define IXGBE_IMIRVP_PRIORITY_MASK 0x00000007 /* VLAN priority mask */ +#define IXGBE_IMIRVP_PRIORITY_EN 0x00000008 /* VLAN priority enable */ + +#define IXGBE_MAX_FTQF_FILTERS 128 +#define IXGBE_FTQF_PROTOCOL_MASK 0x00000003 +#define IXGBE_FTQF_PROTOCOL_TCP 0x00000000 +#define IXGBE_FTQF_PROTOCOL_UDP 0x00000001 +#define IXGBE_FTQF_PROTOCOL_SCTP 2 +#define IXGBE_FTQF_PRIORITY_MASK 0x00000007 +#define IXGBE_FTQF_PRIORITY_SHIFT 2 +#define IXGBE_FTQF_POOL_MASK 0x0000003F +#define IXGBE_FTQF_POOL_SHIFT 8 +#define IXGBE_FTQF_5TUPLE_MASK_MASK 0x0000001F +#define IXGBE_FTQF_5TUPLE_MASK_SHIFT 25 +#define IXGBE_FTQF_SOURCE_ADDR_MASK 0x1E +#define IXGBE_FTQF_DEST_ADDR_MASK 0x1D +#define IXGBE_FTQF_SOURCE_PORT_MASK 0x1B +#define IXGBE_FTQF_DEST_PORT_MASK 0x17 +#define IXGBE_FTQF_PROTOCOL_COMP_MASK 0x0F +#define IXGBE_FTQF_POOL_MASK_EN 0x40000000 +#define IXGBE_FTQF_QUEUE_ENABLE 0x80000000 + +/* Interrupt clear mask */ +#define IXGBE_IRQ_CLEAR_MASK 0xFFFFFFFF + +/* Interrupt Vector Allocation Registers */ +#define IXGBE_IVAR_REG_NUM 25 +#define IXGBE_IVAR_REG_NUM_82599 64 +#define IXGBE_IVAR_TXRX_ENTRY 96 +#define IXGBE_IVAR_RX_ENTRY 64 +#define IXGBE_IVAR_RX_QUEUE(_i) (0 + (_i)) +#define IXGBE_IVAR_TX_QUEUE(_i) (64 + (_i)) +#define IXGBE_IVAR_TX_ENTRY 32 + +#define IXGBE_IVAR_TCP_TIMER_INDEX 96 /* 0 based index */ +#define IXGBE_IVAR_OTHER_CAUSES_INDEX 97 /* 0 based index */ + +#define IXGBE_MSIX_VECTOR(_i) (0 + (_i)) + +#define IXGBE_IVAR_ALLOC_VAL 0x80 /* Interrupt Allocation valid */ + +/* ETYPE Queue Filter/Select Bit Masks */ +#define IXGBE_MAX_ETQF_FILTERS 8 +#define IXGBE_ETQF_FCOE 0x08000000 /* bit 27 */ +#define IXGBE_ETQF_BCN 0x10000000 /* bit 28 */ +#define IXGBE_ETQF_1588 0x40000000 /* bit 30 */ +#define IXGBE_ETQF_FILTER_EN 0x80000000 /* bit 31 */ +#define IXGBE_ETQF_POOL_ENABLE (1 << 26) /* bit 26 */ + +#define IXGBE_ETQS_RX_QUEUE 0x007F0000 /* bits 22:16 */ +#define IXGBE_ETQS_RX_QUEUE_SHIFT 16 +#define IXGBE_ETQS_LLI 0x20000000 /* bit 29 */ +#define IXGBE_ETQS_QUEUE_EN 0x80000000 /* bit 31 */ + +/* + * ETQF filter list: one static filter per filter consumer. This is + * to avoid filter collisions later. Add new filters + * here!! + * + * Current filters: + * EAPOL 802.1x (0x888e): Filter 0 + * FCoE (0x8906): Filter 2 + * 1588 (0x88f7): Filter 3 + * FIP (0x8914): Filter 4 + */ +#define IXGBE_ETQF_FILTER_EAPOL 0 +#define IXGBE_ETQF_FILTER_FCOE 2 +#define IXGBE_ETQF_FILTER_1588 3 +#define IXGBE_ETQF_FILTER_FIP 4 +/* VLAN Control Bit Masks */ +#define IXGBE_VLNCTRL_VET 0x0000FFFF /* bits 0-15 */ +#define IXGBE_VLNCTRL_CFI 0x10000000 /* bit 28 */ +#define IXGBE_VLNCTRL_CFIEN 0x20000000 /* bit 29 */ +#define IXGBE_VLNCTRL_VFE 0x40000000 /* bit 30 */ +#define IXGBE_VLNCTRL_VME 0x80000000 /* bit 31 */ + +/* VLAN pool filtering masks */ +#define IXGBE_VLVF_VIEN 0x80000000 /* filter is valid */ +#define IXGBE_VLVF_ENTRIES 64 +#define IXGBE_VLVF_VLANID_MASK 0x00000FFF +/* Per VF Port VLAN insertion rules */ +#define IXGBE_VMVIR_VLANA_DEFAULT 0x40000000 /* Always use default VLAN */ +#define IXGBE_VMVIR_VLANA_NEVER 0x80000000 /* Never insert VLAN tag */ + +#define IXGBE_ETHERNET_IEEE_VLAN_TYPE 0x8100 /* 802.1q protocol */ + +/* STATUS Bit Masks */ +#define IXGBE_STATUS_LAN_ID 0x0000000C /* LAN ID */ +#define IXGBE_STATUS_LAN_ID_SHIFT 2 /* LAN ID Shift*/ +#define IXGBE_STATUS_GIO 0x00080000 /* GIO Master Ena Status */ + +#define IXGBE_STATUS_LAN_ID_0 0x00000000 /* LAN ID 0 */ +#define IXGBE_STATUS_LAN_ID_1 0x00000004 /* LAN ID 1 */ + +/* ESDP Bit Masks */ +#define IXGBE_ESDP_SDP0 0x00000001 /* SDP0 Data Value */ +#define IXGBE_ESDP_SDP1 0x00000002 /* SDP1 Data Value */ +#define IXGBE_ESDP_SDP2 0x00000004 /* SDP2 Data Value */ +#define IXGBE_ESDP_SDP3 0x00000008 /* SDP3 Data Value */ +#define IXGBE_ESDP_SDP4 0x00000010 /* SDP4 Data Value */ +#define IXGBE_ESDP_SDP5 0x00000020 /* SDP5 Data Value */ +#define IXGBE_ESDP_SDP6 0x00000040 /* SDP6 Data Value */ +#define IXGBE_ESDP_SDP0_DIR 0x00000100 /* SDP0 IO direction */ +#define IXGBE_ESDP_SDP1_DIR 0x00000200 /* SDP1 IO direction */ +#define IXGBE_ESDP_SDP4_DIR 0x00001000 /* SDP4 IO direction */ +#define IXGBE_ESDP_SDP5_DIR 0x00002000 /* SDP5 IO direction */ +#define IXGBE_ESDP_SDP0_NATIVE 0x00010000 /* SDP0 IO mode */ +#define IXGBE_ESDP_SDP1_NATIVE 0x00020000 /* SDP1 IO mode */ + + +/* LEDCTL Bit Masks */ +#define IXGBE_LED_IVRT_BASE 0x00000040 +#define IXGBE_LED_BLINK_BASE 0x00000080 +#define IXGBE_LED_MODE_MASK_BASE 0x0000000F +#define IXGBE_LED_OFFSET(_base, _i) (_base << (8 * (_i))) +#define IXGBE_LED_MODE_SHIFT(_i) (8*(_i)) +#define IXGBE_LED_IVRT(_i) IXGBE_LED_OFFSET(IXGBE_LED_IVRT_BASE, _i) +#define IXGBE_LED_BLINK(_i) IXGBE_LED_OFFSET(IXGBE_LED_BLINK_BASE, _i) +#define IXGBE_LED_MODE_MASK(_i) IXGBE_LED_OFFSET(IXGBE_LED_MODE_MASK_BASE, _i) + +/* LED modes */ +#define IXGBE_LED_LINK_UP 0x0 +#define IXGBE_LED_LINK_10G 0x1 +#define IXGBE_LED_MAC 0x2 +#define IXGBE_LED_FILTER 0x3 +#define IXGBE_LED_LINK_ACTIVE 0x4 +#define IXGBE_LED_LINK_1G 0x5 +#define IXGBE_LED_ON 0xE +#define IXGBE_LED_OFF 0xF + +/* AUTOC Bit Masks */ +#define IXGBE_AUTOC_KX4_KX_SUPP_MASK 0xC0000000 +#define IXGBE_AUTOC_KX4_SUPP 0x80000000 +#define IXGBE_AUTOC_KX_SUPP 0x40000000 +#define IXGBE_AUTOC_PAUSE 0x30000000 +#define IXGBE_AUTOC_ASM_PAUSE 0x20000000 +#define IXGBE_AUTOC_SYM_PAUSE 0x10000000 +#define IXGBE_AUTOC_RF 0x08000000 +#define IXGBE_AUTOC_PD_TMR 0x06000000 +#define IXGBE_AUTOC_AN_RX_LOOSE 0x01000000 +#define IXGBE_AUTOC_AN_RX_DRIFT 0x00800000 +#define IXGBE_AUTOC_AN_RX_ALIGN 0x007C0000 +#define IXGBE_AUTOC_FECA 0x00040000 +#define IXGBE_AUTOC_FECR 0x00020000 +#define IXGBE_AUTOC_KR_SUPP 0x00010000 +#define IXGBE_AUTOC_AN_RESTART 0x00001000 +#define IXGBE_AUTOC_FLU 0x00000001 +#define IXGBE_AUTOC_LMS_SHIFT 13 +#define IXGBE_AUTOC_LMS_10G_SERIAL (0x3 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_KX_KR (0x4 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_SGMII_1G_100M (0x5 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_KX_KR_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_KX_KR_SGMII (0x7 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_MASK (0x7 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_1G_LINK_NO_AN (0x0 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_10G_LINK_NO_AN (0x1 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_1G_AN (0x2 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_AN (0x4 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_KX4_AN_1G_AN (0x6 << IXGBE_AUTOC_LMS_SHIFT) +#define IXGBE_AUTOC_LMS_ATTACH_TYPE (0x7 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) + +#define IXGBE_AUTOC_1G_PMA_PMD_MASK 0x00000200 +#define IXGBE_AUTOC_1G_PMA_PMD_SHIFT 9 +#define IXGBE_AUTOC_10G_PMA_PMD_MASK 0x00000180 +#define IXGBE_AUTOC_10G_PMA_PMD_SHIFT 7 +#define IXGBE_AUTOC_10G_XAUI (0x0 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_KX4 (0x1 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_10G_CX4 (0x2 << IXGBE_AUTOC_10G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_BX (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_KX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_SFI (0x0 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) +#define IXGBE_AUTOC_1G_KX_BX (0x1 << IXGBE_AUTOC_1G_PMA_PMD_SHIFT) + +#define IXGBE_AUTOC2_UPPER_MASK 0xFFFF0000 +#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_MASK 0x00030000 +#define IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT 16 +#define IXGBE_AUTOC2_10G_KR (0x0 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_XFI (0x1 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) +#define IXGBE_AUTOC2_10G_SFI (0x2 << IXGBE_AUTOC2_10G_SERIAL_PMA_PMD_SHIFT) + +#define IXGBE_MACC_FLU 0x00000001 +#define IXGBE_MACC_FSV_10G 0x00030000 +#define IXGBE_MACC_FS 0x00040000 +#define IXGBE_MAC_RX2TX_LPBK 0x00000002 + +/* LINKS Bit Masks */ +#define IXGBE_LINKS_KX_AN_COMP 0x80000000 +#define IXGBE_LINKS_UP 0x40000000 +#define IXGBE_LINKS_SPEED 0x20000000 +#define IXGBE_LINKS_MODE 0x18000000 +#define IXGBE_LINKS_RX_MODE 0x06000000 +#define IXGBE_LINKS_TX_MODE 0x01800000 +#define IXGBE_LINKS_XGXS_EN 0x00400000 +#define IXGBE_LINKS_SGMII_EN 0x02000000 +#define IXGBE_LINKS_PCS_1G_EN 0x00200000 +#define IXGBE_LINKS_1G_AN_EN 0x00100000 +#define IXGBE_LINKS_KX_AN_IDLE 0x00080000 +#define IXGBE_LINKS_1G_SYNC 0x00040000 +#define IXGBE_LINKS_10G_ALIGN 0x00020000 +#define IXGBE_LINKS_10G_LANE_SYNC 0x00017000 +#define IXGBE_LINKS_TL_FAULT 0x00001000 +#define IXGBE_LINKS_SIGNAL 0x00000F00 + +#define IXGBE_LINKS_SPEED_82599 0x30000000 +#define IXGBE_LINKS_SPEED_10G_82599 0x30000000 +#define IXGBE_LINKS_SPEED_1G_82599 0x20000000 +#define IXGBE_LINKS_SPEED_100_82599 0x10000000 +#define IXGBE_LINK_UP_TIME 90 /* 9.0 Seconds */ +#define IXGBE_AUTO_NEG_TIME 45 /* 4.5 Seconds */ + +#define IXGBE_LINKS2_AN_SUPPORTED 0x00000040 + +/* PCS1GLSTA Bit Masks */ +#define IXGBE_PCS1GLSTA_LINK_OK 1 +#define IXGBE_PCS1GLSTA_SYNK_OK 0x10 +#define IXGBE_PCS1GLSTA_AN_COMPLETE 0x10000 +#define IXGBE_PCS1GLSTA_AN_PAGE_RX 0x20000 +#define IXGBE_PCS1GLSTA_AN_TIMED_OUT 0x40000 +#define IXGBE_PCS1GLSTA_AN_REMOTE_FAULT 0x80000 +#define IXGBE_PCS1GLSTA_AN_ERROR_RWS 0x100000 + +#define IXGBE_PCS1GANA_SYM_PAUSE 0x80 +#define IXGBE_PCS1GANA_ASM_PAUSE 0x100 + +/* PCS1GLCTL Bit Masks */ +#define IXGBE_PCS1GLCTL_AN_1G_TIMEOUT_EN 0x00040000 /* PCS 1G autoneg to en */ +#define IXGBE_PCS1GLCTL_FLV_LINK_UP 1 +#define IXGBE_PCS1GLCTL_FORCE_LINK 0x20 +#define IXGBE_PCS1GLCTL_LOW_LINK_LATCH 0x40 +#define IXGBE_PCS1GLCTL_AN_ENABLE 0x10000 +#define IXGBE_PCS1GLCTL_AN_RESTART 0x20000 + +/* ANLP1 Bit Masks */ +#define IXGBE_ANLP1_PAUSE 0x0C00 +#define IXGBE_ANLP1_SYM_PAUSE 0x0400 +#define IXGBE_ANLP1_ASM_PAUSE 0x0800 +#define IXGBE_ANLP1_AN_STATE_MASK 0x000f0000 + +/* SW Semaphore Register bitmasks */ +#define IXGBE_SWSM_SMBI 0x00000001 /* Driver Semaphore bit */ +#define IXGBE_SWSM_SWESMBI 0x00000002 /* FW Semaphore bit */ +#define IXGBE_SWSM_WMNG 0x00000004 /* Wake MNG Clock */ +#define IXGBE_SWFW_REGSMP 0x80000000 /* Register Semaphore bit 31 */ + +/* SW_FW_SYNC/GSSR definitions */ +#define IXGBE_GSSR_EEP_SM 0x0001 +#define IXGBE_GSSR_PHY0_SM 0x0002 +#define IXGBE_GSSR_PHY1_SM 0x0004 +#define IXGBE_GSSR_MAC_CSR_SM 0x0008 +#define IXGBE_GSSR_FLASH_SM 0x0010 +#define IXGBE_GSSR_SW_MNG_SM 0x0400 + +/* FW Status register bitmask */ +#define IXGBE_FWSTS_FWRI 0x00000200 /* Firmware Reset Indication */ + +/* EEC Register */ +#define IXGBE_EEC_SK 0x00000001 /* EEPROM Clock */ +#define IXGBE_EEC_CS 0x00000002 /* EEPROM Chip Select */ +#define IXGBE_EEC_DI 0x00000004 /* EEPROM Data In */ +#define IXGBE_EEC_DO 0x00000008 /* EEPROM Data Out */ +#define IXGBE_EEC_FWE_MASK 0x00000030 /* FLASH Write Enable */ +#define IXGBE_EEC_FWE_DIS 0x00000010 /* Disable FLASH writes */ +#define IXGBE_EEC_FWE_EN 0x00000020 /* Enable FLASH writes */ +#define IXGBE_EEC_FWE_SHIFT 4 +#define IXGBE_EEC_REQ 0x00000040 /* EEPROM Access Request */ +#define IXGBE_EEC_GNT 0x00000080 /* EEPROM Access Grant */ +#define IXGBE_EEC_PRES 0x00000100 /* EEPROM Present */ +#define IXGBE_EEC_ARD 0x00000200 /* EEPROM Auto Read Done */ +#define IXGBE_EEC_FLUP 0x00800000 /* Flash update command */ +#define IXGBE_EEC_SEC1VAL 0x02000000 /* Sector 1 Valid */ +#define IXGBE_EEC_FLUDONE 0x04000000 /* Flash update done */ +/* EEPROM Addressing bits based on type (0-small, 1-large) */ +#define IXGBE_EEC_ADDR_SIZE 0x00000400 +#define IXGBE_EEC_SIZE 0x00007800 /* EEPROM Size */ +#define IXGBE_EERD_MAX_ADDR 0x00003FFF /* EERD alows 14 bits for addr. */ + +#define IXGBE_EEC_SIZE_SHIFT 11 +#define IXGBE_EEPROM_WORD_SIZE_SHIFT 6 +#define IXGBE_EEPROM_OPCODE_BITS 8 + +/* Part Number String Length */ +#define IXGBE_PBANUM_LENGTH 11 + +/* Checksum and EEPROM pointers */ +#define IXGBE_PBANUM_PTR_GUARD 0xFAFA +#define IXGBE_EEPROM_CHECKSUM 0x3F +#define IXGBE_EEPROM_SUM 0xBABA +#define IXGBE_PCIE_ANALOG_PTR 0x03 +#define IXGBE_ATLAS0_CONFIG_PTR 0x04 +#define IXGBE_PHY_PTR 0x04 +#define IXGBE_ATLAS1_CONFIG_PTR 0x05 +#define IXGBE_OPTION_ROM_PTR 0x05 +#define IXGBE_PCIE_GENERAL_PTR 0x06 +#define IXGBE_PCIE_CONFIG0_PTR 0x07 +#define IXGBE_PCIE_CONFIG1_PTR 0x08 +#define IXGBE_CORE0_PTR 0x09 +#define IXGBE_CORE1_PTR 0x0A +#define IXGBE_MAC0_PTR 0x0B +#define IXGBE_MAC1_PTR 0x0C +#define IXGBE_CSR0_CONFIG_PTR 0x0D +#define IXGBE_CSR1_CONFIG_PTR 0x0E +#define IXGBE_FW_PTR 0x0F +#define IXGBE_PBANUM0_PTR 0x15 +#define IXGBE_PBANUM1_PTR 0x16 +#define IXGBE_ALT_MAC_ADDR_PTR 0x37 +#define IXGBE_FREE_SPACE_PTR 0X3E + +/* External Thermal Sensor Config */ +#define IXGBE_ETS_CFG 0x26 +#define IXGBE_ETS_LTHRES_DELTA_MASK 0x07C0 +#define IXGBE_ETS_LTHRES_DELTA_SHIFT 6 +#define IXGBE_ETS_TYPE_MASK 0x0038 +#define IXGBE_ETS_TYPE_SHIFT 3 +#define IXGBE_ETS_TYPE_EMC 0x000 +#define IXGBE_ETS_NUM_SENSORS_MASK 0x0007 +#define IXGBE_ETS_DATA_LOC_MASK 0x3C00 +#define IXGBE_ETS_DATA_LOC_SHIFT 10 +#define IXGBE_ETS_DATA_INDEX_MASK 0x0300 +#define IXGBE_ETS_DATA_INDEX_SHIFT 8 +#define IXGBE_ETS_DATA_HTHRESH_MASK 0x00FF + +#define IXGBE_SAN_MAC_ADDR_PTR 0x28 +#define IXGBE_DEVICE_CAPS 0x2C +#define IXGBE_SERIAL_NUMBER_MAC_ADDR 0x11 +#define IXGBE_PCIE_MSIX_82599_CAPS 0x72 +#define IXGBE_MAX_MSIX_VECTORS_82599 0x40 +#define IXGBE_PCIE_MSIX_82598_CAPS 0x62 +#define IXGBE_MAX_MSIX_VECTORS_82598 0x13 + +/* MSI-X capability fields masks */ +#define IXGBE_PCIE_MSIX_TBL_SZ_MASK 0x7FF + +/* Legacy EEPROM word offsets */ +#define IXGBE_ISCSI_BOOT_CAPS 0x0033 +#define IXGBE_ISCSI_SETUP_PORT_0 0x0030 +#define IXGBE_ISCSI_SETUP_PORT_1 0x0034 + +/* EEPROM Commands - SPI */ +#define IXGBE_EEPROM_MAX_RETRY_SPI 5000 /* Max wait 5ms for RDY signal */ +#define IXGBE_EEPROM_STATUS_RDY_SPI 0x01 +#define IXGBE_EEPROM_READ_OPCODE_SPI 0x03 /* EEPROM read opcode */ +#define IXGBE_EEPROM_WRITE_OPCODE_SPI 0x02 /* EEPROM write opcode */ +#define IXGBE_EEPROM_A8_OPCODE_SPI 0x08 /* opcode bit-3 = addr bit-8 */ +#define IXGBE_EEPROM_WREN_OPCODE_SPI 0x06 /* EEPROM set Write Ena latch */ +/* EEPROM reset Write Enable latch */ +#define IXGBE_EEPROM_WRDI_OPCODE_SPI 0x04 +#define IXGBE_EEPROM_RDSR_OPCODE_SPI 0x05 /* EEPROM read Status reg */ +#define IXGBE_EEPROM_WRSR_OPCODE_SPI 0x01 /* EEPROM write Status reg */ +#define IXGBE_EEPROM_ERASE4K_OPCODE_SPI 0x20 /* EEPROM ERASE 4KB */ +#define IXGBE_EEPROM_ERASE64K_OPCODE_SPI 0xD8 /* EEPROM ERASE 64KB */ +#define IXGBE_EEPROM_ERASE256_OPCODE_SPI 0xDB /* EEPROM ERASE 256B */ + +/* EEPROM Read Register */ +#define IXGBE_EEPROM_RW_REG_DATA 16 /* data offset in EEPROM read reg */ +#define IXGBE_EEPROM_RW_REG_DONE 2 /* Offset to READ done bit */ +#define IXGBE_EEPROM_RW_REG_START 1 /* First bit to start operation */ +#define IXGBE_EEPROM_RW_ADDR_SHIFT 2 /* Shift to the address bits */ +#define IXGBE_NVM_POLL_WRITE 1 /* Flag for polling for wr complete */ +#define IXGBE_NVM_POLL_READ 0 /* Flag for polling for rd complete */ + +#define IXGBE_ETH_LENGTH_OF_ADDRESS 6 + +#define IXGBE_EEPROM_PAGE_SIZE_MAX 128 +#define IXGBE_EEPROM_RD_BUFFER_MAX_COUNT 512 /* words rd in burst */ +#define IXGBE_EEPROM_WR_BUFFER_MAX_COUNT 256 /* words wr in burst */ + +#ifndef IXGBE_EEPROM_GRANT_ATTEMPTS +#define IXGBE_EEPROM_GRANT_ATTEMPTS 1000 /* EEPROM attempts to gain grant */ +#endif + +#ifndef IXGBE_EERD_EEWR_ATTEMPTS +/* Number of 5 microseconds we wait for EERD read and + * EERW write to complete */ +#define IXGBE_EERD_EEWR_ATTEMPTS 100000 +#endif + +#ifndef IXGBE_FLUDONE_ATTEMPTS +/* # attempts we wait for flush update to complete */ +#define IXGBE_FLUDONE_ATTEMPTS 20000 +#endif + +#define IXGBE_PCIE_CTRL2 0x5 /* PCIe Control 2 Offset */ +#define IXGBE_PCIE_CTRL2_DUMMY_ENABLE 0x8 /* Dummy Function Enable */ +#define IXGBE_PCIE_CTRL2_LAN_DISABLE 0x2 /* LAN PCI Disable */ +#define IXGBE_PCIE_CTRL2_DISABLE_SELECT 0x1 /* LAN Disable Select */ + +#define IXGBE_SAN_MAC_ADDR_PORT0_OFFSET 0x0 +#define IXGBE_SAN_MAC_ADDR_PORT1_OFFSET 0x3 +#define IXGBE_DEVICE_CAPS_ALLOW_ANY_SFP 0x1 +#define IXGBE_DEVICE_CAPS_FCOE_OFFLOADS 0x2 +#define IXGBE_FW_LESM_PARAMETERS_PTR 0x2 +#define IXGBE_FW_LESM_STATE_1 0x1 +#define IXGBE_FW_LESM_STATE_ENABLED 0x8000 /* LESM Enable bit */ +#define IXGBE_FW_PASSTHROUGH_PATCH_CONFIG_PTR 0x4 +#define IXGBE_FW_PATCH_VERSION_4 0x7 +#define IXGBE_FCOE_IBA_CAPS_BLK_PTR 0x33 /* iSCSI/FCOE block */ +#define IXGBE_FCOE_IBA_CAPS_FCOE 0x20 /* FCOE flags */ +#define IXGBE_ISCSI_FCOE_BLK_PTR 0x17 /* iSCSI/FCOE block */ +#define IXGBE_ISCSI_FCOE_FLAGS_OFFSET 0x0 /* FCOE flags */ +#define IXGBE_ISCSI_FCOE_FLAGS_ENABLE 0x1 /* FCOE flags enable bit */ +#define IXGBE_ALT_SAN_MAC_ADDR_BLK_PTR 0x27 /* Alt. SAN MAC block */ +#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_OFFSET 0x0 /* Alt SAN MAC capability */ +#define IXGBE_ALT_SAN_MAC_ADDR_PORT0_OFFSET 0x1 /* Alt SAN MAC 0 offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_PORT1_OFFSET 0x4 /* Alt SAN MAC 1 offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_WWNN_OFFSET 0x7 /* Alt WWNN prefix offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_WWPN_OFFSET 0x8 /* Alt WWPN prefix offset */ +#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_SANMAC 0x0 /* Alt SAN MAC exists */ +#define IXGBE_ALT_SAN_MAC_ADDR_CAPS_ALTWWN 0x1 /* Alt WWN base exists */ + +#define IXGBE_DEVICE_CAPS_WOL_PORT0_1 0x4 /* WoL supported on ports 0 & 1 */ +#define IXGBE_DEVICE_CAPS_WOL_PORT0 0x8 /* WoL supported on port 0 */ +#define IXGBE_DEVICE_CAPS_WOL_MASK 0xC /* Mask for WoL capabilities */ + +/* PCI Bus Info */ +#define IXGBE_PCI_DEVICE_STATUS 0xAA +#define IXGBE_PCI_DEVICE_STATUS_TRANSACTION_PENDING 0x0020 +#define IXGBE_PCI_LINK_STATUS 0xB2 +#define IXGBE_PCI_DEVICE_CONTROL2 0xC8 +#define IXGBE_PCI_LINK_WIDTH 0x3F0 +#define IXGBE_PCI_LINK_WIDTH_1 0x10 +#define IXGBE_PCI_LINK_WIDTH_2 0x20 +#define IXGBE_PCI_LINK_WIDTH_4 0x40 +#define IXGBE_PCI_LINK_WIDTH_8 0x80 +#define IXGBE_PCI_LINK_SPEED 0xF +#define IXGBE_PCI_LINK_SPEED_2500 0x1 +#define IXGBE_PCI_LINK_SPEED_5000 0x2 +#define IXGBE_PCI_LINK_SPEED_8000 0x3 +#define IXGBE_PCI_HEADER_TYPE_REGISTER 0x0E +#define IXGBE_PCI_HEADER_TYPE_MULTIFUNC 0x80 +#define IXGBE_PCI_DEVICE_CONTROL2_16ms 0x0005 + +/* Number of 100 microseconds we wait for PCI Express master disable */ +#define IXGBE_PCI_MASTER_DISABLE_TIMEOUT 800 + +/* Check whether address is multicast. This is little-endian specific check.*/ +#define IXGBE_IS_MULTICAST(Address) \ + (bool)(((u8 *)(Address))[0] & ((u8)0x01)) + +/* Check whether an address is broadcast. */ +#define IXGBE_IS_BROADCAST(Address) \ + ((((u8 *)(Address))[0] == ((u8)0xff)) && \ + (((u8 *)(Address))[1] == ((u8)0xff))) + +/* RAH */ +#define IXGBE_RAH_VIND_MASK 0x003C0000 +#define IXGBE_RAH_VIND_SHIFT 18 +#define IXGBE_RAH_AV 0x80000000 +#define IXGBE_CLEAR_VMDQ_ALL 0xFFFFFFFF + +/* Header split receive */ +#define IXGBE_RFCTL_ISCSI_DIS 0x00000001 +#define IXGBE_RFCTL_ISCSI_DWC_MASK 0x0000003E +#define IXGBE_RFCTL_ISCSI_DWC_SHIFT 1 +#define IXGBE_RFCTL_RSC_DIS 0x00000010 +#define IXGBE_RFCTL_NFSW_DIS 0x00000040 +#define IXGBE_RFCTL_NFSR_DIS 0x00000080 +#define IXGBE_RFCTL_NFS_VER_MASK 0x00000300 +#define IXGBE_RFCTL_NFS_VER_SHIFT 8 +#define IXGBE_RFCTL_NFS_VER_2 0 +#define IXGBE_RFCTL_NFS_VER_3 1 +#define IXGBE_RFCTL_NFS_VER_4 2 +#define IXGBE_RFCTL_IPV6_DIS 0x00000400 +#define IXGBE_RFCTL_IPV6_XSUM_DIS 0x00000800 +#define IXGBE_RFCTL_IPFRSP_DIS 0x00004000 +#define IXGBE_RFCTL_IPV6_EX_DIS 0x00010000 +#define IXGBE_RFCTL_NEW_IPV6_EXT_DIS 0x00020000 + +/* Transmit Config masks */ +#define IXGBE_TXDCTL_ENABLE 0x02000000 /* Ena specific Tx Queue */ +#define IXGBE_TXDCTL_SWFLSH 0x04000000 /* Tx Desc. wr-bk flushing */ +#define IXGBE_TXDCTL_WTHRESH_SHIFT 16 /* shift to WTHRESH bits */ +/* Enable short packet padding to 64 bytes */ +#define IXGBE_TX_PAD_ENABLE 0x00000400 +#define IXGBE_JUMBO_FRAME_ENABLE 0x00000004 /* Allow jumbo frames */ +/* This allows for 16K packets + 4k for vlan */ +#define IXGBE_MAX_FRAME_SZ 0x40040000 + +#define IXGBE_TDWBAL_HEAD_WB_ENABLE 0x1 /* Tx head write-back enable */ +#define IXGBE_TDWBAL_SEQNUM_WB_ENABLE 0x2 /* Tx seq# write-back enable */ + +/* Receive Config masks */ +#define IXGBE_RXCTRL_RXEN 0x00000001 /* Enable Receiver */ +#define IXGBE_RXCTRL_DMBYPS 0x00000002 /* Desc Monitor Bypass */ +#define IXGBE_RXDCTL_ENABLE 0x02000000 /* Ena specific Rx Queue */ +#define IXGBE_RXDCTL_SWFLSH 0x04000000 /* Rx Desc wr-bk flushing */ +#define IXGBE_RXDCTL_RLPMLMASK 0x00003FFF /* X540 supported only */ +#define IXGBE_RXDCTL_RLPML_EN 0x00008000 +#define IXGBE_RXDCTL_VME 0x40000000 /* VLAN mode enable */ + +#define IXGBE_TSYNCTXCTL_VALID 0x00000001 /* Tx timestamp valid */ +#define IXGBE_TSYNCTXCTL_ENABLED 0x00000010 /* Tx timestamping enabled */ + +#define IXGBE_TSYNCRXCTL_VALID 0x00000001 /* Rx timestamp valid */ +#define IXGBE_TSYNCRXCTL_TYPE_MASK 0x0000000E /* Rx type mask */ +#define IXGBE_TSYNCRXCTL_TYPE_L2_V2 0x00 +#define IXGBE_TSYNCRXCTL_TYPE_L4_V1 0x02 +#define IXGBE_TSYNCRXCTL_TYPE_L2_L4_V2 0x04 +#define IXGBE_TSYNCRXCTL_TYPE_EVENT_V2 0x0A +#define IXGBE_TSYNCRXCTL_ENABLED 0x00000010 /* Rx Timestamping enabled */ + +#define IXGBE_RXMTRL_V1_CTRLT_MASK 0x000000FF +#define IXGBE_RXMTRL_V1_SYNC_MSG 0x00 +#define IXGBE_RXMTRL_V1_DELAY_REQ_MSG 0x01 +#define IXGBE_RXMTRL_V1_FOLLOWUP_MSG 0x02 +#define IXGBE_RXMTRL_V1_DELAY_RESP_MSG 0x03 +#define IXGBE_RXMTRL_V1_MGMT_MSG 0x04 + +#define IXGBE_RXMTRL_V2_MSGID_MASK 0x0000FF00 +#define IXGBE_RXMTRL_V2_SYNC_MSG 0x0000 +#define IXGBE_RXMTRL_V2_DELAY_REQ_MSG 0x0100 +#define IXGBE_RXMTRL_V2_PDELAY_REQ_MSG 0x0200 +#define IXGBE_RXMTRL_V2_PDELAY_RESP_MSG 0x0300 +#define IXGBE_RXMTRL_V2_FOLLOWUP_MSG 0x0800 +#define IXGBE_RXMTRL_V2_DELAY_RESP_MSG 0x0900 +#define IXGBE_RXMTRL_V2_PDELAY_FOLLOWUP_MSG 0x0A00 +#define IXGBE_RXMTRL_V2_ANNOUNCE_MSG 0x0B00 +#define IXGBE_RXMTRL_V2_SIGNALLING_MSG 0x0C00 +#define IXGBE_RXMTRL_V2_MGMT_MSG 0x0D00 + +#define IXGBE_FCTRL_SBP 0x00000002 /* Store Bad Packet */ +#define IXGBE_FCTRL_MPE 0x00000100 /* Multicast Promiscuous Ena*/ +#define IXGBE_FCTRL_UPE 0x00000200 /* Unicast Promiscuous Ena */ +#define IXGBE_FCTRL_BAM 0x00000400 /* Broadcast Accept Mode */ +#define IXGBE_FCTRL_PMCF 0x00001000 /* Pass MAC Control Frames */ +#define IXGBE_FCTRL_DPF 0x00002000 /* Discard Pause Frame */ +/* Receive Priority Flow Control Enable */ +#define IXGBE_FCTRL_RPFCE 0x00004000 +#define IXGBE_FCTRL_RFCE 0x00008000 /* Receive Flow Control Ena */ +#define IXGBE_MFLCN_PMCF 0x00000001 /* Pass MAC Control Frames */ +#define IXGBE_MFLCN_DPF 0x00000002 /* Discard Pause Frame */ +#define IXGBE_MFLCN_RPFCE 0x00000004 /* Receive Priority FC Enable */ +#define IXGBE_MFLCN_RFCE 0x00000008 /* Receive FC Enable */ +#define IXGBE_MFLCN_RPFCE_MASK 0x00000FF4 /* Rx Priority FC bitmap mask */ +#define IXGBE_MFLCN_RPFCE_SHIFT 4 /* Rx Priority FC bitmap shift */ + +/* Multiple Receive Queue Control */ +#define IXGBE_MRQC_RSSEN 0x00000001 /* RSS Enable */ +#define IXGBE_MRQC_MRQE_MASK 0xF /* Bits 3:0 */ +#define IXGBE_MRQC_RT8TCEN 0x00000002 /* 8 TC no RSS */ +#define IXGBE_MRQC_RT4TCEN 0x00000003 /* 4 TC no RSS */ +#define IXGBE_MRQC_RTRSS8TCEN 0x00000004 /* 8 TC w/ RSS */ +#define IXGBE_MRQC_RTRSS4TCEN 0x00000005 /* 4 TC w/ RSS */ +#define IXGBE_MRQC_VMDQEN 0x00000008 /* VMDq2 64 pools no RSS */ +#define IXGBE_MRQC_VMDQRSS32EN 0x0000000A /* VMDq2 32 pools w/ RSS */ +#define IXGBE_MRQC_VMDQRSS64EN 0x0000000B /* VMDq2 64 pools w/ RSS */ +#define IXGBE_MRQC_VMDQRT8TCEN 0x0000000C /* VMDq2/RT 16 pool 8 TC */ +#define IXGBE_MRQC_VMDQRT4TCEN 0x0000000D /* VMDq2/RT 32 pool 4 TC */ +#define IXGBE_MRQC_RSS_FIELD_MASK 0xFFFF0000 +#define IXGBE_MRQC_RSS_FIELD_IPV4_TCP 0x00010000 +#define IXGBE_MRQC_RSS_FIELD_IPV4 0x00020000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_TCP 0x00040000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_EX 0x00080000 +#define IXGBE_MRQC_RSS_FIELD_IPV6 0x00100000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_TCP 0x00200000 +#define IXGBE_MRQC_RSS_FIELD_IPV4_UDP 0x00400000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_UDP 0x00800000 +#define IXGBE_MRQC_RSS_FIELD_IPV6_EX_UDP 0x01000000 +#define IXGBE_MRQC_L3L4TXSWEN 0x00008000 + +/* Queue Drop Enable */ +#define IXGBE_QDE_ENABLE 0x00000001 +#define IXGBE_QDE_IDX_MASK 0x00007F00 +#define IXGBE_QDE_IDX_SHIFT 8 +#define IXGBE_QDE_WRITE 0x00010000 +#define IXGBE_QDE_READ 0x00020000 + +#define IXGBE_TXD_POPTS_IXSM 0x01 /* Insert IP checksum */ +#define IXGBE_TXD_POPTS_TXSM 0x02 /* Insert TCP/UDP checksum */ +#define IXGBE_TXD_CMD_EOP 0x01000000 /* End of Packet */ +#define IXGBE_TXD_CMD_IFCS 0x02000000 /* Insert FCS (Ethernet CRC) */ +#define IXGBE_TXD_CMD_IC 0x04000000 /* Insert Checksum */ +#define IXGBE_TXD_CMD_RS 0x08000000 /* Report Status */ +#define IXGBE_TXD_CMD_DEXT 0x20000000 /* Desc extension (0 = legacy) */ +#define IXGBE_TXD_CMD_VLE 0x40000000 /* Add VLAN tag */ +#define IXGBE_TXD_STAT_DD 0x00000001 /* Descriptor Done */ + +#define IXGBE_RXDADV_IPSEC_STATUS_SECP 0x00020000 +#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_PROTOCOL 0x08000000 +#define IXGBE_RXDADV_IPSEC_ERROR_INVALID_LENGTH 0x10000000 +#define IXGBE_RXDADV_IPSEC_ERROR_AUTH_FAILED 0x18000000 +#define IXGBE_RXDADV_IPSEC_ERROR_BIT_MASK 0x18000000 +/* Multiple Transmit Queue Command Register */ +#define IXGBE_MTQC_RT_ENA 0x1 /* DCB Enable */ +#define IXGBE_MTQC_VT_ENA 0x2 /* VMDQ2 Enable */ +#define IXGBE_MTQC_64Q_1PB 0x0 /* 64 queues 1 pack buffer */ +#define IXGBE_MTQC_32VF 0x8 /* 4 TX Queues per pool w/32VF's */ +#define IXGBE_MTQC_64VF 0x4 /* 2 TX Queues per pool w/64VF's */ +#define IXGBE_MTQC_4TC_4TQ 0x8 /* 4 TC if RT_ENA and VT_ENA */ +#define IXGBE_MTQC_8TC_8TQ 0xC /* 8 TC if RT_ENA or 8 TQ if VT_ENA */ + +/* Receive Descriptor bit definitions */ +#define IXGBE_RXD_STAT_DD 0x01 /* Descriptor Done */ +#define IXGBE_RXD_STAT_EOP 0x02 /* End of Packet */ +#define IXGBE_RXD_STAT_FLM 0x04 /* FDir Match */ +#define IXGBE_RXD_STAT_VP 0x08 /* IEEE VLAN Packet */ +#define IXGBE_RXDADV_NEXTP_MASK 0x000FFFF0 /* Next Descriptor Index */ +#define IXGBE_RXDADV_NEXTP_SHIFT 0x00000004 +#define IXGBE_RXD_STAT_UDPCS 0x10 /* UDP xsum calculated */ +#define IXGBE_RXD_STAT_L4CS 0x20 /* L4 xsum calculated */ +#define IXGBE_RXD_STAT_IPCS 0x40 /* IP xsum calculated */ +#define IXGBE_RXD_STAT_PIF 0x80 /* passed in-exact filter */ +#define IXGBE_RXD_STAT_CRCV 0x100 /* Speculative CRC Valid */ +#define IXGBE_RXD_STAT_VEXT 0x200 /* 1st VLAN found */ +#define IXGBE_RXD_STAT_UDPV 0x400 /* Valid UDP checksum */ +#define IXGBE_RXD_STAT_DYNINT 0x800 /* Pkt caused INT via DYNINT */ +#define IXGBE_RXD_STAT_LLINT 0x800 /* Pkt caused Low Latency Interrupt */ +#define IXGBE_RXD_STAT_TS 0x10000 /* Time Stamp */ +#define IXGBE_RXD_STAT_SECP 0x20000 /* Security Processing */ +#define IXGBE_RXD_STAT_LB 0x40000 /* Loopback Status */ +#define IXGBE_RXD_STAT_ACK 0x8000 /* ACK Packet indication */ +#define IXGBE_RXD_ERR_CE 0x01 /* CRC Error */ +#define IXGBE_RXD_ERR_LE 0x02 /* Length Error */ +#define IXGBE_RXD_ERR_PE 0x08 /* Packet Error */ +#define IXGBE_RXD_ERR_OSE 0x10 /* Oversize Error */ +#define IXGBE_RXD_ERR_USE 0x20 /* Undersize Error */ +#define IXGBE_RXD_ERR_TCPE 0x40 /* TCP/UDP Checksum Error */ +#define IXGBE_RXD_ERR_IPE 0x80 /* IP Checksum Error */ +#define IXGBE_RXDADV_ERR_MASK 0xfff00000 /* RDESC.ERRORS mask */ +#define IXGBE_RXDADV_ERR_SHIFT 20 /* RDESC.ERRORS shift */ +#define IXGBE_RXDADV_ERR_RXE 0x20000000 /* Any MAC Error */ +#define IXGBE_RXDADV_ERR_FCEOFE 0x80000000 /* FCoEFe/IPE */ +#define IXGBE_RXDADV_ERR_FCERR 0x00700000 /* FCERR/FDIRERR */ +#define IXGBE_RXDADV_ERR_FDIR_LEN 0x00100000 /* FDIR Length error */ +#define IXGBE_RXDADV_ERR_FDIR_DROP 0x00200000 /* FDIR Drop error */ +#define IXGBE_RXDADV_ERR_FDIR_COLL 0x00400000 /* FDIR Collision error */ +#define IXGBE_RXDADV_ERR_HBO 0x00800000 /*Header Buffer Overflow */ +#define IXGBE_RXDADV_ERR_CE 0x01000000 /* CRC Error */ +#define IXGBE_RXDADV_ERR_LE 0x02000000 /* Length Error */ +#define IXGBE_RXDADV_ERR_PE 0x08000000 /* Packet Error */ +#define IXGBE_RXDADV_ERR_OSE 0x10000000 /* Oversize Error */ +#define IXGBE_RXDADV_ERR_USE 0x20000000 /* Undersize Error */ +#define IXGBE_RXDADV_ERR_TCPE 0x40000000 /* TCP/UDP Checksum Error */ +#define IXGBE_RXDADV_ERR_IPE 0x80000000 /* IP Checksum Error */ +#define IXGBE_RXD_VLAN_ID_MASK 0x0FFF /* VLAN ID is in lower 12 bits */ +#define IXGBE_RXD_PRI_MASK 0xE000 /* Priority is in upper 3 bits */ +#define IXGBE_RXD_PRI_SHIFT 13 +#define IXGBE_RXD_CFI_MASK 0x1000 /* CFI is bit 12 */ +#define IXGBE_RXD_CFI_SHIFT 12 + +#define IXGBE_RXDADV_STAT_DD IXGBE_RXD_STAT_DD /* Done */ +#define IXGBE_RXDADV_STAT_EOP IXGBE_RXD_STAT_EOP /* End of Packet */ +#define IXGBE_RXDADV_STAT_FLM IXGBE_RXD_STAT_FLM /* FDir Match */ +#define IXGBE_RXDADV_STAT_VP IXGBE_RXD_STAT_VP /* IEEE VLAN Pkt */ +#define IXGBE_RXDADV_STAT_MASK 0x000fffff /* Stat/NEXTP: bit 0-19 */ +#define IXGBE_RXDADV_STAT_FCEOFS 0x00000040 /* FCoE EOF/SOF Stat */ +#define IXGBE_RXDADV_STAT_FCSTAT 0x00000030 /* FCoE Pkt Stat */ +#define IXGBE_RXDADV_STAT_FCSTAT_NOMTCH 0x00000000 /* 00: No Ctxt Match */ +#define IXGBE_RXDADV_STAT_FCSTAT_NODDP 0x00000010 /* 01: Ctxt w/o DDP */ +#define IXGBE_RXDADV_STAT_FCSTAT_FCPRSP 0x00000020 /* 10: Recv. FCP_RSP */ +#define IXGBE_RXDADV_STAT_FCSTAT_DDP 0x00000030 /* 11: Ctxt w/ DDP */ +#define IXGBE_RXDADV_STAT_TS 0x00010000 /* IEEE1588 Time Stamp */ + +/* PSRTYPE bit definitions */ +#define IXGBE_PSRTYPE_TCPHDR 0x00000010 +#define IXGBE_PSRTYPE_UDPHDR 0x00000020 +#define IXGBE_PSRTYPE_IPV4HDR 0x00000100 +#define IXGBE_PSRTYPE_IPV6HDR 0x00000200 +#define IXGBE_PSRTYPE_L2HDR 0x00001000 + +/* SRRCTL bit definitions */ +#define IXGBE_SRRCTL_BSIZEPKT_SHIFT 10 /* so many KBs */ +#define IXGBE_SRRCTL_RDMTS_SHIFT 22 +#define IXGBE_SRRCTL_RDMTS_MASK 0x01C00000 +#define IXGBE_SRRCTL_DROP_EN 0x10000000 +#define IXGBE_SRRCTL_BSIZEPKT_MASK 0x0000007F +#define IXGBE_SRRCTL_BSIZEHDR_MASK 0x00003F00 +#define IXGBE_SRRCTL_DESCTYPE_LEGACY 0x00000000 +#define IXGBE_SRRCTL_DESCTYPE_ADV_ONEBUF 0x02000000 +#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT 0x04000000 +#define IXGBE_SRRCTL_DESCTYPE_HDR_REPLICATION_LARGE_PKT 0x08000000 +#define IXGBE_SRRCTL_DESCTYPE_HDR_SPLIT_ALWAYS 0x0A000000 +#define IXGBE_SRRCTL_DESCTYPE_MASK 0x0E000000 + +#define IXGBE_RXDPS_HDRSTAT_HDRSP 0x00008000 +#define IXGBE_RXDPS_HDRSTAT_HDRLEN_MASK 0x000003FF + +#define IXGBE_RXDADV_RSSTYPE_MASK 0x0000000F +#define IXGBE_RXDADV_PKTTYPE_MASK 0x0000FFF0 +#define IXGBE_RXDADV_PKTTYPE_MASK_EX 0x0001FFF0 +#define IXGBE_RXDADV_HDRBUFLEN_MASK 0x00007FE0 +#define IXGBE_RXDADV_RSCCNT_MASK 0x001E0000 +#define IXGBE_RXDADV_RSCCNT_SHIFT 17 +#define IXGBE_RXDADV_HDRBUFLEN_SHIFT 5 +#define IXGBE_RXDADV_SPLITHEADER_EN 0x00001000 +#define IXGBE_RXDADV_SPH 0x8000 + +/* RSS Hash results */ +#define IXGBE_RXDADV_RSSTYPE_NONE 0x00000000 +#define IXGBE_RXDADV_RSSTYPE_IPV4_TCP 0x00000001 +#define IXGBE_RXDADV_RSSTYPE_IPV4 0x00000002 +#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP 0x00000003 +#define IXGBE_RXDADV_RSSTYPE_IPV6_EX 0x00000004 +#define IXGBE_RXDADV_RSSTYPE_IPV6 0x00000005 +#define IXGBE_RXDADV_RSSTYPE_IPV6_TCP_EX 0x00000006 +#define IXGBE_RXDADV_RSSTYPE_IPV4_UDP 0x00000007 +#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP 0x00000008 +#define IXGBE_RXDADV_RSSTYPE_IPV6_UDP_EX 0x00000009 + +/* RSS Packet Types as indicated in the receive descriptor. */ +#define IXGBE_RXDADV_PKTTYPE_NONE 0x00000000 +#define IXGBE_RXDADV_PKTTYPE_IPV4 0x00000010 /* IPv4 hdr present */ +#define IXGBE_RXDADV_PKTTYPE_IPV4_EX 0x00000020 /* IPv4 hdr + extensions */ +#define IXGBE_RXDADV_PKTTYPE_IPV6 0x00000040 /* IPv6 hdr present */ +#define IXGBE_RXDADV_PKTTYPE_IPV6_EX 0x00000080 /* IPv6 hdr + extensions */ +#define IXGBE_RXDADV_PKTTYPE_TCP 0x00000100 /* TCP hdr present */ +#define IXGBE_RXDADV_PKTTYPE_UDP 0x00000200 /* UDP hdr present */ +#define IXGBE_RXDADV_PKTTYPE_SCTP 0x00000400 /* SCTP hdr present */ +#define IXGBE_RXDADV_PKTTYPE_NFS 0x00000800 /* NFS hdr present */ +#define IXGBE_RXDADV_PKTTYPE_IPSEC_ESP 0x00001000 /* IPSec ESP */ +#define IXGBE_RXDADV_PKTTYPE_IPSEC_AH 0x00002000 /* IPSec AH */ +#define IXGBE_RXDADV_PKTTYPE_LINKSEC 0x00004000 /* LinkSec Encap */ +#define IXGBE_RXDADV_PKTTYPE_ETQF 0x00008000 /* PKTTYPE is ETQF index */ +#define IXGBE_RXDADV_PKTTYPE_ETQF_MASK 0x00000070 /* ETQF has 8 indices */ +#define IXGBE_RXDADV_PKTTYPE_ETQF_SHIFT 4 /* Right-shift 4 bits */ + +/* Security Processing bit Indication */ +#define IXGBE_RXDADV_LNKSEC_STATUS_SECP 0x00020000 +#define IXGBE_RXDADV_LNKSEC_ERROR_NO_SA_MATCH 0x08000000 +#define IXGBE_RXDADV_LNKSEC_ERROR_REPLAY_ERROR 0x10000000 +#define IXGBE_RXDADV_LNKSEC_ERROR_BIT_MASK 0x18000000 +#define IXGBE_RXDADV_LNKSEC_ERROR_BAD_SIG 0x18000000 + +/* Masks to determine if packets should be dropped due to frame errors */ +#define IXGBE_RXD_ERR_FRAME_ERR_MASK ( \ + IXGBE_RXD_ERR_CE | \ + IXGBE_RXD_ERR_LE | \ + IXGBE_RXD_ERR_PE | \ + IXGBE_RXD_ERR_OSE | \ + IXGBE_RXD_ERR_USE) + +#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK ( \ + IXGBE_RXDADV_ERR_CE | \ + IXGBE_RXDADV_ERR_LE | \ + IXGBE_RXDADV_ERR_PE | \ + IXGBE_RXDADV_ERR_OSE | \ + IXGBE_RXDADV_ERR_USE) + +#define IXGBE_RXDADV_ERR_FRAME_ERR_MASK_82599 IXGBE_RXDADV_ERR_RXE + +/* Multicast bit mask */ +#define IXGBE_MCSTCTRL_MFE 0x4 + +/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ +#define IXGBE_REQ_TX_DESCRIPTOR_MULTIPLE 8 +#define IXGBE_REQ_RX_DESCRIPTOR_MULTIPLE 8 +#define IXGBE_REQ_TX_BUFFER_GRANULARITY 1024 + +/* Vlan-specific macros */ +#define IXGBE_RX_DESC_SPECIAL_VLAN_MASK 0x0FFF /* VLAN ID in lower 12 bits */ +#define IXGBE_RX_DESC_SPECIAL_PRI_MASK 0xE000 /* Priority in upper 3 bits */ +#define IXGBE_RX_DESC_SPECIAL_PRI_SHIFT 0x000D /* Priority in upper 3 of 16 */ +#define IXGBE_TX_DESC_SPECIAL_PRI_SHIFT IXGBE_RX_DESC_SPECIAL_PRI_SHIFT + +/* SR-IOV specific macros */ +#define IXGBE_MBVFICR_INDEX(vf_number) (vf_number >> 4) +#define IXGBE_MBVFICR(_i) (0x00710 + ((_i) * 4)) +#define IXGBE_VFLRE(_i) (((_i & 1) ? 0x001C0 : 0x00600)) +#define IXGBE_VFLREC(_i) (0x00700 + ((_i) * 4)) +/* Translated register #defines */ +#define IXGBE_PVFCTRL(P) (0x00300 + (4 * (P))) +#define IXGBE_PVFSTATUS(P) (0x00008 + (0 * (P))) +#define IXGBE_PVFLINKS(P) (0x042A4 + (0 * (P))) +#define IXGBE_PVFRTIMER(P) (0x00048 + (0 * (P))) +#define IXGBE_PVFMAILBOX(P) (0x04C00 + (4 * (P))) +#define IXGBE_PVFRXMEMWRAP(P) (0x03190 + (0 * (P))) +#define IXGBE_PVTEICR(P) (0x00B00 + (4 * (P))) +#define IXGBE_PVTEICS(P) (0x00C00 + (4 * (P))) +#define IXGBE_PVTEIMS(P) (0x00D00 + (4 * (P))) +#define IXGBE_PVTEIMC(P) (0x00E00 + (4 * (P))) +#define IXGBE_PVTEIAC(P) (0x00F00 + (4 * (P))) +#define IXGBE_PVTEIAM(P) (0x04D00 + (4 * (P))) +#define IXGBE_PVTEITR(P) (((P) < 24) ? (0x00820 + ((P) * 4)) : \ + (0x012300 + (((P) - 24) * 4))) +#define IXGBE_PVTIVAR(P) (0x12500 + (4 * (P))) +#define IXGBE_PVTIVAR_MISC(P) (0x04E00 + (4 * (P))) +#define IXGBE_PVTRSCINT(P) (0x12000 + (4 * (P))) +#define IXGBE_VFPBACL(P) (0x110C8 + (4 * (P))) +#define IXGBE_PVFRDBAL(P) ((P < 64) ? (0x01000 + (0x40 * (P))) \ + : (0x0D000 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDBAH(P) ((P < 64) ? (0x01004 + (0x40 * (P))) \ + : (0x0D004 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDLEN(P) ((P < 64) ? (0x01008 + (0x40 * (P))) \ + : (0x0D008 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDH(P) ((P < 64) ? (0x01010 + (0x40 * (P))) \ + : (0x0D010 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRDT(P) ((P < 64) ? (0x01018 + (0x40 * (P))) \ + : (0x0D018 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFRXDCTL(P) ((P < 64) ? (0x01028 + (0x40 * (P))) \ + : (0x0D028 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFSRRCTL(P) ((P < 64) ? (0x01014 + (0x40 * (P))) \ + : (0x0D014 + (0x40 * ((P) - 64)))) +#define IXGBE_PVFPSRTYPE(P) (0x0EA00 + (4 * (P))) +#define IXGBE_PVFTDBAL(P) (0x06000 + (0x40 * (P))) +#define IXGBE_PVFTDBAH(P) (0x06004 + (0x40 * (P))) +#define IXGBE_PVFTTDLEN(P) (0x06008 + (0x40 * (P))) +#define IXGBE_PVFTDH(P) (0x06010 + (0x40 * (P))) +#define IXGBE_PVFTDT(P) (0x06018 + (0x40 * (P))) +#define IXGBE_PVFTXDCTL(P) (0x06028 + (0x40 * (P))) +#define IXGBE_PVFTDWBAL(P) (0x06038 + (0x40 * (P))) +#define IXGBE_PVFTDWBAH(P) (0x0603C + (0x40 * (P))) +#define IXGBE_PVFDCA_RXCTRL(P) (((P) < 64) ? (0x0100C + (0x40 * (P))) \ + : (0x0D00C + (0x40 * ((P) - 64)))) +#define IXGBE_PVFDCA_TXCTRL(P) (0x0600C + (0x40 * (P))) +#define IXGBE_PVFGPRC(x) (0x0101C + (0x40 * (x))) +#define IXGBE_PVFGPTC(x) (0x08300 + (0x04 * (x))) +#define IXGBE_PVFGORC_LSB(x) (0x01020 + (0x40 * (x))) +#define IXGBE_PVFGORC_MSB(x) (0x0D020 + (0x40 * (x))) +#define IXGBE_PVFGOTC_LSB(x) (0x08400 + (0x08 * (x))) +#define IXGBE_PVFGOTC_MSB(x) (0x08404 + (0x08 * (x))) +#define IXGBE_PVFMPRC(x) (0x0D01C + (0x40 * (x))) + +#define IXGBE_PVFTDWBALn(q_per_pool, vf_number, vf_q_index) \ + (IXGBE_PVFTDWBAL((q_per_pool)*(vf_number) + (vf_q_index))) +#define IXGBE_PVFTDWBAHn(q_per_pool, vf_number, vf_q_index) \ + (IXGBE_PVFTDWBAH((q_per_pool)*(vf_number) + (vf_q_index))) + +/* Little Endian defines */ +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 + +#endif +#ifndef __be16 +/* Big Endian defines */ +#define __be16 u16 +#define __be32 u32 +#define __be64 u64 + +#endif +enum ixgbe_fdir_pballoc_type { + IXGBE_FDIR_PBALLOC_NONE = 0, + IXGBE_FDIR_PBALLOC_64K = 1, + IXGBE_FDIR_PBALLOC_128K = 2, + IXGBE_FDIR_PBALLOC_256K = 3, +}; + +/* Flow Director register values */ +#define IXGBE_FDIRCTRL_PBALLOC_64K 0x00000001 +#define IXGBE_FDIRCTRL_PBALLOC_128K 0x00000002 +#define IXGBE_FDIRCTRL_PBALLOC_256K 0x00000003 +#define IXGBE_FDIRCTRL_INIT_DONE 0x00000008 +#define IXGBE_FDIRCTRL_PERFECT_MATCH 0x00000010 +#define IXGBE_FDIRCTRL_REPORT_STATUS 0x00000020 +#define IXGBE_FDIRCTRL_REPORT_STATUS_ALWAYS 0x00000080 +#define IXGBE_FDIRCTRL_DROP_Q_SHIFT 8 +#define IXGBE_FDIRCTRL_FLEX_SHIFT 16 +#define IXGBE_FDIRCTRL_SEARCHLIM 0x00800000 +#define IXGBE_FDIRCTRL_MAX_LENGTH_SHIFT 24 +#define IXGBE_FDIRCTRL_FULL_THRESH_MASK 0xF0000000 +#define IXGBE_FDIRCTRL_FULL_THRESH_SHIFT 28 + +#define IXGBE_FDIRTCPM_DPORTM_SHIFT 16 +#define IXGBE_FDIRUDPM_DPORTM_SHIFT 16 +#define IXGBE_FDIRIP6M_DIPM_SHIFT 16 +#define IXGBE_FDIRM_VLANID 0x00000001 +#define IXGBE_FDIRM_VLANP 0x00000002 +#define IXGBE_FDIRM_POOL 0x00000004 +#define IXGBE_FDIRM_L4P 0x00000008 +#define IXGBE_FDIRM_FLEX 0x00000010 +#define IXGBE_FDIRM_DIPv6 0x00000020 + +#define IXGBE_FDIRFREE_FREE_MASK 0xFFFF +#define IXGBE_FDIRFREE_FREE_SHIFT 0 +#define IXGBE_FDIRFREE_COLL_MASK 0x7FFF0000 +#define IXGBE_FDIRFREE_COLL_SHIFT 16 +#define IXGBE_FDIRLEN_MAXLEN_MASK 0x3F +#define IXGBE_FDIRLEN_MAXLEN_SHIFT 0 +#define IXGBE_FDIRLEN_MAXHASH_MASK 0x7FFF0000 +#define IXGBE_FDIRLEN_MAXHASH_SHIFT 16 +#define IXGBE_FDIRUSTAT_ADD_MASK 0xFFFF +#define IXGBE_FDIRUSTAT_ADD_SHIFT 0 +#define IXGBE_FDIRUSTAT_REMOVE_MASK 0xFFFF0000 +#define IXGBE_FDIRUSTAT_REMOVE_SHIFT 16 +#define IXGBE_FDIRFSTAT_FADD_MASK 0x00FF +#define IXGBE_FDIRFSTAT_FADD_SHIFT 0 +#define IXGBE_FDIRFSTAT_FREMOVE_MASK 0xFF00 +#define IXGBE_FDIRFSTAT_FREMOVE_SHIFT 8 +#define IXGBE_FDIRPORT_DESTINATION_SHIFT 16 +#define IXGBE_FDIRVLAN_FLEX_SHIFT 16 +#define IXGBE_FDIRHASH_BUCKET_VALID_SHIFT 15 +#define IXGBE_FDIRHASH_SIG_SW_INDEX_SHIFT 16 + +#define IXGBE_FDIRCMD_CMD_MASK 0x00000003 +#define IXGBE_FDIRCMD_CMD_ADD_FLOW 0x00000001 +#define IXGBE_FDIRCMD_CMD_REMOVE_FLOW 0x00000002 +#define IXGBE_FDIRCMD_CMD_QUERY_REM_FILT 0x00000003 +#define IXGBE_FDIRCMD_FILTER_VALID 0x00000004 +#define IXGBE_FDIRCMD_FILTER_UPDATE 0x00000008 +#define IXGBE_FDIRCMD_IPv6DMATCH 0x00000010 +#define IXGBE_FDIRCMD_L4TYPE_UDP 0x00000020 +#define IXGBE_FDIRCMD_L4TYPE_TCP 0x00000040 +#define IXGBE_FDIRCMD_L4TYPE_SCTP 0x00000060 +#define IXGBE_FDIRCMD_IPV6 0x00000080 +#define IXGBE_FDIRCMD_CLEARHT 0x00000100 +#define IXGBE_FDIRCMD_DROP 0x00000200 +#define IXGBE_FDIRCMD_INT 0x00000400 +#define IXGBE_FDIRCMD_LAST 0x00000800 +#define IXGBE_FDIRCMD_COLLISION 0x00001000 +#define IXGBE_FDIRCMD_QUEUE_EN 0x00008000 +#define IXGBE_FDIRCMD_FLOW_TYPE_SHIFT 5 +#define IXGBE_FDIRCMD_RX_QUEUE_SHIFT 16 +#define IXGBE_FDIRCMD_VT_POOL_SHIFT 24 +#define IXGBE_FDIR_INIT_DONE_POLL 10 +#define IXGBE_FDIRCMD_CMD_POLL 10 + +#define IXGBE_FDIR_DROP_QUEUE 127 + +#define IXGBE_STATUS_OVERHEATING_BIT 20 /* STATUS overtemp bit num */ + +/* Manageablility Host Interface defines */ +#define IXGBE_HI_MAX_BLOCK_BYTE_LENGTH 1792 /* Num of bytes in range */ +#define IXGBE_HI_MAX_BLOCK_DWORD_LENGTH 448 /* Num of dwords in range */ +#define IXGBE_HI_COMMAND_TIMEOUT 500 /* Process HI command limit */ + +/* CEM Support */ +#define FW_CEM_HDR_LEN 0x4 +#define FW_CEM_CMD_DRIVER_INFO 0xDD +#define FW_CEM_CMD_DRIVER_INFO_LEN 0x5 +#define FW_CEM_CMD_RESERVED 0X0 +#define FW_CEM_UNUSED_VER 0x0 +#define FW_CEM_MAX_RETRIES 3 +#define FW_CEM_RESP_STATUS_SUCCESS 0x1 + +/* Host Interface Command Structures */ + +struct ixgbe_hic_hdr { + u8 cmd; + u8 buf_len; + union { + u8 cmd_resv; + u8 ret_status; + } cmd_or_resp; + u8 checksum; +}; + +struct ixgbe_hic_drv_info { + struct ixgbe_hic_hdr hdr; + u8 port_num; + u8 ver_sub; + u8 ver_build; + u8 ver_min; + u8 ver_maj; + u8 pad; /* end spacing to ensure length is mult. of dword */ + u16 pad2; /* end spacing to ensure length is mult. of dword2 */ +}; + +/* Transmit Descriptor - Legacy */ +struct ixgbe_legacy_tx_desc { + u64 buffer_addr; /* Address of the descriptor's data buffer */ + union { + __le32 data; + struct { + __le16 length; /* Data buffer length */ + u8 cso; /* Checksum offset */ + u8 cmd; /* Descriptor control */ + } flags; + } lower; + union { + __le32 data; + struct { + u8 status; /* Descriptor status */ + u8 css; /* Checksum start */ + __le16 vlan; + } fields; + } upper; +}; + +/* Transmit Descriptor - Advanced */ +union ixgbe_adv_tx_desc { + struct { + __le64 buffer_addr; /* Address of descriptor's data buf */ + __le32 cmd_type_len; + __le32 olinfo_status; + } read; + struct { + __le64 rsvd; /* Reserved */ + __le32 nxtseq_seed; + __le32 status; + } wb; +}; + +/* Receive Descriptor - Legacy */ +struct ixgbe_legacy_rx_desc { + __le64 buffer_addr; /* Address of the descriptor's data buffer */ + __le16 length; /* Length of data DMAed into data buffer */ + __le16 csum; /* Packet checksum */ + u8 status; /* Descriptor status */ + u8 errors; /* Descriptor Errors */ + __le16 vlan; +}; + +/* Receive Descriptor - Advanced */ +union ixgbe_adv_rx_desc { + struct { + __le64 pkt_addr; /* Packet buffer address */ + __le64 hdr_addr; /* Header buffer address */ + } read; + struct { + struct { + union { + __le32 data; + struct { + __le16 pkt_info; /* RSS, Pkt type */ + __le16 hdr_info; /* Splithdr, hdrlen */ + } hs_rss; + } lo_dword; + union { + __le32 rss; /* RSS Hash */ + struct { + __le16 ip_id; /* IP id */ + __le16 csum; /* Packet Checksum */ + } csum_ip; + } hi_dword; + } lower; + struct { + __le32 status_error; /* ext status/error */ + __le16 length; /* Packet length */ + __le16 vlan; /* VLAN tag */ + } upper; + } wb; /* writeback */ +}; + +/* Context descriptors */ +struct ixgbe_adv_tx_context_desc { + __le32 vlan_macip_lens; + __le32 seqnum_seed; + __le32 type_tucmd_mlhl; + __le32 mss_l4len_idx; +}; + +/* Adv Transmit Descriptor Config Masks */ +#define IXGBE_ADVTXD_DTALEN_MASK 0x0000FFFF /* Data buf length(bytes) */ +#define IXGBE_ADVTXD_MAC_LINKSEC 0x00040000 /* Insert LinkSec */ +#define IXGBE_ADVTXD_MAC_TSTAMP 0x00080000 /* IEEE1588 time stamp */ +#define IXGBE_ADVTXD_IPSEC_SA_INDEX_MASK 0x000003FF /* IPSec SA index */ +#define IXGBE_ADVTXD_IPSEC_ESP_LEN_MASK 0x000001FF /* IPSec ESP length */ +#define IXGBE_ADVTXD_DTYP_MASK 0x00F00000 /* DTYP mask */ +#define IXGBE_ADVTXD_DTYP_CTXT 0x00200000 /* Adv Context Desc */ +#define IXGBE_ADVTXD_DTYP_DATA 0x00300000 /* Adv Data Descriptor */ +#define IXGBE_ADVTXD_DCMD_EOP IXGBE_TXD_CMD_EOP /* End of Packet */ +#define IXGBE_ADVTXD_DCMD_IFCS IXGBE_TXD_CMD_IFCS /* Insert FCS */ +#define IXGBE_ADVTXD_DCMD_RS IXGBE_TXD_CMD_RS /* Report Status */ +#define IXGBE_ADVTXD_DCMD_DDTYP_ISCSI 0x10000000 /* DDP hdr type or iSCSI */ +#define IXGBE_ADVTXD_DCMD_DEXT IXGBE_TXD_CMD_DEXT /* Desc ext 1=Adv */ +#define IXGBE_ADVTXD_DCMD_VLE IXGBE_TXD_CMD_VLE /* VLAN pkt enable */ +#define IXGBE_ADVTXD_DCMD_TSE 0x80000000 /* TCP Seg enable */ +#define IXGBE_ADVTXD_STAT_DD IXGBE_TXD_STAT_DD /* Descriptor Done */ +#define IXGBE_ADVTXD_STAT_SN_CRC 0x00000002 /* NXTSEQ/SEED pres in WB */ +#define IXGBE_ADVTXD_STAT_RSV 0x0000000C /* STA Reserved */ +#define IXGBE_ADVTXD_IDX_SHIFT 4 /* Adv desc Index shift */ +#define IXGBE_ADVTXD_CC 0x00000080 /* Check Context */ +#define IXGBE_ADVTXD_POPTS_SHIFT 8 /* Adv desc POPTS shift */ +#define IXGBE_ADVTXD_POPTS_IXSM (IXGBE_TXD_POPTS_IXSM << \ + IXGBE_ADVTXD_POPTS_SHIFT) +#define IXGBE_ADVTXD_POPTS_TXSM (IXGBE_TXD_POPTS_TXSM << \ + IXGBE_ADVTXD_POPTS_SHIFT) +#define IXGBE_ADVTXD_POPTS_ISCO_1ST 0x00000000 /* 1st TSO of iSCSI PDU */ +#define IXGBE_ADVTXD_POPTS_ISCO_MDL 0x00000800 /* Middle TSO of iSCSI PDU */ +#define IXGBE_ADVTXD_POPTS_ISCO_LAST 0x00001000 /* Last TSO of iSCSI PDU */ +/* 1st&Last TSO-full iSCSI PDU */ +#define IXGBE_ADVTXD_POPTS_ISCO_FULL 0x00001800 +#define IXGBE_ADVTXD_POPTS_RSV 0x00002000 /* POPTS Reserved */ +#define IXGBE_ADVTXD_PAYLEN_SHIFT 14 /* Adv desc PAYLEN shift */ +#define IXGBE_ADVTXD_MACLEN_SHIFT 9 /* Adv ctxt desc mac len shift */ +#define IXGBE_ADVTXD_VLAN_SHIFT 16 /* Adv ctxt vlan tag shift */ +#define IXGBE_ADVTXD_TUCMD_IPV4 0x00000400 /* IP Packet Type: 1=IPv4 */ +#define IXGBE_ADVTXD_TUCMD_IPV6 0x00000000 /* IP Packet Type: 0=IPv6 */ +#define IXGBE_ADVTXD_TUCMD_L4T_UDP 0x00000000 /* L4 Packet TYPE of UDP */ +#define IXGBE_ADVTXD_TUCMD_L4T_TCP 0x00000800 /* L4 Packet TYPE of TCP */ +#define IXGBE_ADVTXD_TUCMD_L4T_SCTP 0x00001000 /* L4 Packet TYPE of SCTP */ +#define IXGBE_ADVTXD_TUCMD_MKRREQ 0x00002000 /* req Markers and CRC */ +#define IXGBE_ADVTXD_POPTS_IPSEC 0x00000400 /* IPSec offload request */ +#define IXGBE_ADVTXD_TUCMD_IPSEC_TYPE_ESP 0x00002000 /* IPSec Type ESP */ +#define IXGBE_ADVTXD_TUCMD_IPSEC_ENCRYPT_EN 0x00004000/* ESP Encrypt Enable */ +#define IXGBE_ADVTXT_TUCMD_FCOE 0x00008000 /* FCoE Frame Type */ +#define IXGBE_ADVTXD_FCOEF_EOF_MASK (0x3 << 10) /* FC EOF index */ +#define IXGBE_ADVTXD_FCOEF_SOF ((1 << 2) << 10) /* FC SOF index */ +#define IXGBE_ADVTXD_FCOEF_PARINC ((1 << 3) << 10) /* Rel_Off in F_CTL */ +#define IXGBE_ADVTXD_FCOEF_ORIE ((1 << 4) << 10) /* Orientation End */ +#define IXGBE_ADVTXD_FCOEF_ORIS ((1 << 5) << 10) /* Orientation Start */ +#define IXGBE_ADVTXD_FCOEF_EOF_N (0x0 << 10) /* 00: EOFn */ +#define IXGBE_ADVTXD_FCOEF_EOF_T (0x1 << 10) /* 01: EOFt */ +#define IXGBE_ADVTXD_FCOEF_EOF_NI (0x2 << 10) /* 10: EOFni */ +#define IXGBE_ADVTXD_FCOEF_EOF_A (0x3 << 10) /* 11: EOFa */ +#define IXGBE_ADVTXD_L4LEN_SHIFT 8 /* Adv ctxt L4LEN shift */ +#define IXGBE_ADVTXD_MSS_SHIFT 16 /* Adv ctxt MSS shift */ + +/* Autonegotiation advertised speeds */ +typedef u32 ixgbe_autoneg_advertised; +/* Link speed */ +typedef u32 ixgbe_link_speed; +#define IXGBE_LINK_SPEED_UNKNOWN 0 +#define IXGBE_LINK_SPEED_100_FULL 0x0008 +#define IXGBE_LINK_SPEED_1GB_FULL 0x0020 +#define IXGBE_LINK_SPEED_10GB_FULL 0x0080 +#define IXGBE_LINK_SPEED_82598_AUTONEG (IXGBE_LINK_SPEED_1GB_FULL | \ + IXGBE_LINK_SPEED_10GB_FULL) +#define IXGBE_LINK_SPEED_82599_AUTONEG (IXGBE_LINK_SPEED_100_FULL | \ + IXGBE_LINK_SPEED_1GB_FULL | \ + IXGBE_LINK_SPEED_10GB_FULL) + + +/* Physical layer type */ +typedef u32 ixgbe_physical_layer; +#define IXGBE_PHYSICAL_LAYER_UNKNOWN 0 +#define IXGBE_PHYSICAL_LAYER_10GBASE_T 0x0001 +#define IXGBE_PHYSICAL_LAYER_1000BASE_T 0x0002 +#define IXGBE_PHYSICAL_LAYER_100BASE_TX 0x0004 +#define IXGBE_PHYSICAL_LAYER_SFP_PLUS_CU 0x0008 +#define IXGBE_PHYSICAL_LAYER_10GBASE_LR 0x0010 +#define IXGBE_PHYSICAL_LAYER_10GBASE_LRM 0x0020 +#define IXGBE_PHYSICAL_LAYER_10GBASE_SR 0x0040 +#define IXGBE_PHYSICAL_LAYER_10GBASE_KX4 0x0080 +#define IXGBE_PHYSICAL_LAYER_10GBASE_CX4 0x0100 +#define IXGBE_PHYSICAL_LAYER_1000BASE_KX 0x0200 +#define IXGBE_PHYSICAL_LAYER_1000BASE_BX 0x0400 +#define IXGBE_PHYSICAL_LAYER_10GBASE_KR 0x0800 +#define IXGBE_PHYSICAL_LAYER_10GBASE_XAUI 0x1000 +#define IXGBE_PHYSICAL_LAYER_SFP_ACTIVE_DA 0x2000 +#define IXGBE_PHYSICAL_LAYER_1000BASE_SX 0x4000 + +/* Flow Control Data Sheet defined values + * Calculation and defines taken from 802.1bb Annex O + */ + +/* BitTimes (BT) conversion */ +#define IXGBE_BT2KB(BT) ((BT + (8 * 1024 - 1)) / (8 * 1024)) +#define IXGBE_B2BT(BT) (BT * 8) + +/* Calculate Delay to respond to PFC */ +#define IXGBE_PFC_D 672 + +/* Calculate Cable Delay */ +#define IXGBE_CABLE_DC 5556 /* Delay Copper */ +#define IXGBE_CABLE_DO 5000 /* Delay Optical */ + +/* Calculate Interface Delay X540 */ +#define IXGBE_PHY_DC 25600 /* Delay 10G BASET */ +#define IXGBE_MAC_DC 8192 /* Delay Copper XAUI interface */ +#define IXGBE_XAUI_DC (2 * 2048) /* Delay Copper Phy */ + +#define IXGBE_ID_X540 (IXGBE_MAC_DC + IXGBE_XAUI_DC + IXGBE_PHY_DC) + +/* Calculate Interface Delay 82598, 82599 */ +#define IXGBE_PHY_D 12800 +#define IXGBE_MAC_D 4096 +#define IXGBE_XAUI_D (2 * 1024) + +#define IXGBE_ID (IXGBE_MAC_D + IXGBE_XAUI_D + IXGBE_PHY_D) + +/* Calculate Delay incurred from higher layer */ +#define IXGBE_HD 6144 + +/* Calculate PCI Bus delay for low thresholds */ +#define IXGBE_PCI_DELAY 10000 + +/* Calculate X540 delay value in bit times */ +#define IXGBE_DV_X540(_max_frame_link, _max_frame_tc) \ + ((36 * \ + (IXGBE_B2BT(_max_frame_link) + \ + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID_X540) + \ + IXGBE_HD) / 25 + 1) + \ + 2 * IXGBE_B2BT(_max_frame_tc)) + +/* Calculate 82599, 82598 delay value in bit times */ +#define IXGBE_DV(_max_frame_link, _max_frame_tc) \ + ((36 * \ + (IXGBE_B2BT(_max_frame_link) + \ + IXGBE_PFC_D + \ + (2 * IXGBE_CABLE_DC) + \ + (2 * IXGBE_ID) + \ + IXGBE_HD) / 25 + 1) + \ + 2 * IXGBE_B2BT(_max_frame_tc)) + +/* Calculate low threshold delay values */ +#define IXGBE_LOW_DV_X540(_max_frame_tc) \ + (2 * IXGBE_B2BT(_max_frame_tc) + \ + (36 * IXGBE_PCI_DELAY / 25) + 1) +#define IXGBE_LOW_DV(_max_frame_tc) \ + (2 * IXGBE_LOW_DV_X540(_max_frame_tc)) + +/* Software ATR hash keys */ +#define IXGBE_ATR_BUCKET_HASH_KEY 0x3DAD14E2 +#define IXGBE_ATR_SIGNATURE_HASH_KEY 0x174D3614 + +/* Software ATR input stream values and masks */ +#define IXGBE_ATR_HASH_MASK 0x7fff +#define IXGBE_ATR_L4TYPE_MASK 0x3 +#define IXGBE_ATR_L4TYPE_UDP 0x1 +#define IXGBE_ATR_L4TYPE_TCP 0x2 +#define IXGBE_ATR_L4TYPE_SCTP 0x3 +#define IXGBE_ATR_L4TYPE_IPV6_MASK 0x4 +enum ixgbe_atr_flow_type { + IXGBE_ATR_FLOW_TYPE_IPV4 = 0x0, + IXGBE_ATR_FLOW_TYPE_UDPV4 = 0x1, + IXGBE_ATR_FLOW_TYPE_TCPV4 = 0x2, + IXGBE_ATR_FLOW_TYPE_SCTPV4 = 0x3, + IXGBE_ATR_FLOW_TYPE_IPV6 = 0x4, + IXGBE_ATR_FLOW_TYPE_UDPV6 = 0x5, + IXGBE_ATR_FLOW_TYPE_TCPV6 = 0x6, + IXGBE_ATR_FLOW_TYPE_SCTPV6 = 0x7, +}; + +/* Flow Director ATR input struct. */ +union ixgbe_atr_input { + /* + * Byte layout in order, all values with MSB first: + * + * vm_pool - 1 byte + * flow_type - 1 byte + * vlan_id - 2 bytes + * src_ip - 16 bytes + * dst_ip - 16 bytes + * src_port - 2 bytes + * dst_port - 2 bytes + * flex_bytes - 2 bytes + * bkt_hash - 2 bytes + */ + struct { + u8 vm_pool; + u8 flow_type; + __be16 vlan_id; + __be32 dst_ip[4]; + __be32 src_ip[4]; + __be16 src_port; + __be16 dst_port; + __be16 flex_bytes; + __be16 bkt_hash; + } formatted; + __be32 dword_stream[11]; +}; + +/* Flow Director compressed ATR hash input struct */ +union ixgbe_atr_hash_dword { + struct { + u8 vm_pool; + u8 flow_type; + __be16 vlan_id; + } formatted; + __be32 ip; + struct { + __be16 src; + __be16 dst; + } port; + __be16 flex_bytes; + __be32 dword; +}; + + +/* + * Unavailable: The FCoE Boot Option ROM is not present in the flash. + * Disabled: Present; boot order is not set for any targets on the port. + * Enabled: Present; boot order is set for at least one target on the port. + */ +enum ixgbe_fcoe_boot_status { + ixgbe_fcoe_bootstatus_disabled = 0, + ixgbe_fcoe_bootstatus_enabled = 1, + ixgbe_fcoe_bootstatus_unavailable = 0xFFFF +}; + +enum ixgbe_eeprom_type { + ixgbe_eeprom_uninitialized = 0, + ixgbe_eeprom_spi, + ixgbe_flash, + ixgbe_eeprom_none /* No NVM support */ +}; + +enum ixgbe_mac_type { + ixgbe_mac_unknown = 0, + ixgbe_mac_82598EB, + ixgbe_mac_82599EB, + ixgbe_mac_X540, + ixgbe_num_macs +}; + +enum ixgbe_phy_type { + ixgbe_phy_unknown = 0, + ixgbe_phy_none, + ixgbe_phy_tn, + ixgbe_phy_aq, + ixgbe_phy_cu_unknown, + ixgbe_phy_qt, + ixgbe_phy_xaui, + ixgbe_phy_nl, + ixgbe_phy_sfp_passive_tyco, + ixgbe_phy_sfp_passive_unknown, + ixgbe_phy_sfp_active_unknown, + ixgbe_phy_sfp_avago, + ixgbe_phy_sfp_ftl, + ixgbe_phy_sfp_ftl_active, + ixgbe_phy_sfp_unknown, + ixgbe_phy_sfp_intel, + ixgbe_phy_sfp_unsupported, /*Enforce bit set with unsupported module*/ + ixgbe_phy_generic +}; + +/* + * SFP+ module type IDs: + * + * ID Module Type + * ============= + * 0 SFP_DA_CU + * 1 SFP_SR + * 2 SFP_LR + * 3 SFP_DA_CU_CORE0 - 82599-specific + * 4 SFP_DA_CU_CORE1 - 82599-specific + * 5 SFP_SR/LR_CORE0 - 82599-specific + * 6 SFP_SR/LR_CORE1 - 82599-specific + */ +enum ixgbe_sfp_type { + ixgbe_sfp_type_da_cu = 0, + ixgbe_sfp_type_sr = 1, + ixgbe_sfp_type_lr = 2, + ixgbe_sfp_type_da_cu_core0 = 3, + ixgbe_sfp_type_da_cu_core1 = 4, + ixgbe_sfp_type_srlr_core0 = 5, + ixgbe_sfp_type_srlr_core1 = 6, + ixgbe_sfp_type_da_act_lmt_core0 = 7, + ixgbe_sfp_type_da_act_lmt_core1 = 8, + ixgbe_sfp_type_1g_cu_core0 = 9, + ixgbe_sfp_type_1g_cu_core1 = 10, + ixgbe_sfp_type_1g_sx_core0 = 11, + ixgbe_sfp_type_1g_sx_core1 = 12, + ixgbe_sfp_type_not_present = 0xFFFE, + ixgbe_sfp_type_unknown = 0xFFFF +}; + +enum ixgbe_media_type { + ixgbe_media_type_unknown = 0, + ixgbe_media_type_fiber, + ixgbe_media_type_fiber_qsfp, + ixgbe_media_type_fiber_lco, + ixgbe_media_type_copper, + ixgbe_media_type_backplane, + ixgbe_media_type_cx4, + ixgbe_media_type_virtual +}; + +/* Flow Control Settings */ +enum ixgbe_fc_mode { + ixgbe_fc_none = 0, + ixgbe_fc_rx_pause, + ixgbe_fc_tx_pause, + ixgbe_fc_full, + ixgbe_fc_default +}; + +/* Smart Speed Settings */ +#define IXGBE_SMARTSPEED_MAX_RETRIES 3 +enum ixgbe_smart_speed { + ixgbe_smart_speed_auto = 0, + ixgbe_smart_speed_on, + ixgbe_smart_speed_off +}; + +/* PCI bus types */ +enum ixgbe_bus_type { + ixgbe_bus_type_unknown = 0, + ixgbe_bus_type_pci, + ixgbe_bus_type_pcix, + ixgbe_bus_type_pci_express, + ixgbe_bus_type_reserved +}; + +/* PCI bus speeds */ +enum ixgbe_bus_speed { + ixgbe_bus_speed_unknown = 0, + ixgbe_bus_speed_33 = 33, + ixgbe_bus_speed_66 = 66, + ixgbe_bus_speed_100 = 100, + ixgbe_bus_speed_120 = 120, + ixgbe_bus_speed_133 = 133, + ixgbe_bus_speed_2500 = 2500, + ixgbe_bus_speed_5000 = 5000, + ixgbe_bus_speed_8000 = 8000, + ixgbe_bus_speed_reserved +}; + +/* PCI bus widths */ +enum ixgbe_bus_width { + ixgbe_bus_width_unknown = 0, + ixgbe_bus_width_pcie_x1 = 1, + ixgbe_bus_width_pcie_x2 = 2, + ixgbe_bus_width_pcie_x4 = 4, + ixgbe_bus_width_pcie_x8 = 8, + ixgbe_bus_width_32 = 32, + ixgbe_bus_width_64 = 64, + ixgbe_bus_width_reserved +}; + +struct ixgbe_addr_filter_info { + u32 num_mc_addrs; + u32 rar_used_count; + u32 mta_in_use; + u32 overflow_promisc; + bool user_set_promisc; +}; + +/* Bus parameters */ +struct ixgbe_bus_info { + enum ixgbe_bus_speed speed; + enum ixgbe_bus_width width; + enum ixgbe_bus_type type; + + u16 func; + u16 lan_id; +}; + +/* Flow control parameters */ +struct ixgbe_fc_info { + u32 high_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl High-water */ + u32 low_water[IXGBE_DCB_MAX_TRAFFIC_CLASS]; /* Flow Ctrl Low-water */ + u16 pause_time; /* Flow Control Pause timer */ + bool send_xon; /* Flow control send XON */ + bool strict_ieee; /* Strict IEEE mode */ + bool disable_fc_autoneg; /* Do not autonegotiate FC */ + bool fc_was_autonegged; /* Is current_mode the result of autonegging? */ + enum ixgbe_fc_mode current_mode; /* FC mode in effect */ + enum ixgbe_fc_mode requested_mode; /* FC mode requested by caller */ +}; + +/* Statistics counters collected by the MAC */ +struct ixgbe_hw_stats { + u64 crcerrs; + u64 illerrc; + u64 errbc; + u64 mspdc; + u64 mpctotal; + u64 mpc[8]; + u64 mlfc; + u64 mrfc; + u64 rlec; + u64 lxontxc; + u64 lxonrxc; + u64 lxofftxc; + u64 lxoffrxc; + u64 pxontxc[8]; + u64 pxonrxc[8]; + u64 pxofftxc[8]; + u64 pxoffrxc[8]; + u64 prc64; + u64 prc127; + u64 prc255; + u64 prc511; + u64 prc1023; + u64 prc1522; + u64 gprc; + u64 bprc; + u64 mprc; + u64 gptc; + u64 gorc; + u64 gotc; + u64 rnbc[8]; + u64 ruc; + u64 rfc; + u64 roc; + u64 rjc; + u64 mngprc; + u64 mngpdc; + u64 mngptc; + u64 tor; + u64 tpr; + u64 tpt; + u64 ptc64; + u64 ptc127; + u64 ptc255; + u64 ptc511; + u64 ptc1023; + u64 ptc1522; + u64 mptc; + u64 bptc; + u64 xec; + u64 qprc[16]; + u64 qptc[16]; + u64 qbrc[16]; + u64 qbtc[16]; + u64 qprdc[16]; + u64 pxon2offc[8]; + u64 fdirustat_add; + u64 fdirustat_remove; + u64 fdirfstat_fadd; + u64 fdirfstat_fremove; + u64 fdirmatch; + u64 fdirmiss; + u64 fccrc; + u64 fclast; + u64 fcoerpdc; + u64 fcoeprc; + u64 fcoeptc; + u64 fcoedwrc; + u64 fcoedwtc; + u64 fcoe_noddp; + u64 fcoe_noddp_ext_buff; + u64 ldpcec; + u64 pcrc8ec; + u64 b2ospc; + u64 b2ogprc; + u64 o2bgptc; + u64 o2bspc; +}; + +/* forward declaration */ +struct ixgbe_hw; + +/* iterator type for walking multicast address lists */ +typedef u8* (*ixgbe_mc_addr_itr) (struct ixgbe_hw *hw, u8 **mc_addr_ptr, + u32 *vmdq); + +/* Function pointer table */ +struct ixgbe_eeprom_operations { + s32 (*init_params)(struct ixgbe_hw *); + s32 (*read)(struct ixgbe_hw *, u16, u16 *); + s32 (*read_buffer)(struct ixgbe_hw *, u16, u16, u16 *); + s32 (*write)(struct ixgbe_hw *, u16, u16); + s32 (*write_buffer)(struct ixgbe_hw *, u16, u16, u16 *); + s32 (*validate_checksum)(struct ixgbe_hw *, u16 *); + s32 (*update_checksum)(struct ixgbe_hw *); + u16 (*calc_checksum)(struct ixgbe_hw *); +}; + +struct ixgbe_mac_operations { + s32 (*init_hw)(struct ixgbe_hw *); + s32 (*reset_hw)(struct ixgbe_hw *); + s32 (*start_hw)(struct ixgbe_hw *); + s32 (*clear_hw_cntrs)(struct ixgbe_hw *); + enum ixgbe_media_type (*get_media_type)(struct ixgbe_hw *); + u32 (*get_supported_physical_layer)(struct ixgbe_hw *); + s32 (*get_mac_addr)(struct ixgbe_hw *, u8 *); + s32 (*get_san_mac_addr)(struct ixgbe_hw *, u8 *); + s32 (*set_san_mac_addr)(struct ixgbe_hw *, u8 *); + s32 (*get_device_caps)(struct ixgbe_hw *, u16 *); + s32 (*get_wwn_prefix)(struct ixgbe_hw *, u16 *, u16 *); + s32 (*get_fcoe_boot_status)(struct ixgbe_hw *, u16 *); + s32 (*stop_adapter)(struct ixgbe_hw *); + s32 (*get_bus_info)(struct ixgbe_hw *); + void (*set_lan_id)(struct ixgbe_hw *); + s32 (*read_analog_reg8)(struct ixgbe_hw*, u32, u8*); + s32 (*write_analog_reg8)(struct ixgbe_hw*, u32, u8); + s32 (*setup_sfp)(struct ixgbe_hw *); + s32 (*enable_rx_dma)(struct ixgbe_hw *, u32); + s32 (*disable_sec_rx_path)(struct ixgbe_hw *); + s32 (*enable_sec_rx_path)(struct ixgbe_hw *); + s32 (*acquire_swfw_sync)(struct ixgbe_hw *, u16); + void (*release_swfw_sync)(struct ixgbe_hw *, u16); + + /* Link */ + void (*disable_tx_laser)(struct ixgbe_hw *); + void (*enable_tx_laser)(struct ixgbe_hw *); + void (*flap_tx_laser)(struct ixgbe_hw *); + s32 (*setup_link)(struct ixgbe_hw *, ixgbe_link_speed, bool, bool); + s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *, bool); + s32 (*get_link_capabilities)(struct ixgbe_hw *, ixgbe_link_speed *, + bool *); + + /* Packet Buffer manipulation */ + void (*setup_rxpba)(struct ixgbe_hw *, int, u32, int); + + /* LED */ + s32 (*led_on)(struct ixgbe_hw *, u32); + s32 (*led_off)(struct ixgbe_hw *, u32); + s32 (*blink_led_start)(struct ixgbe_hw *, u32); + s32 (*blink_led_stop)(struct ixgbe_hw *, u32); + + /* RAR, Multicast, VLAN */ + s32 (*set_rar)(struct ixgbe_hw *, u32, u8 *, u32, u32); + s32 (*set_uc_addr)(struct ixgbe_hw *, u32, u8 *); + s32 (*clear_rar)(struct ixgbe_hw *, u32); + s32 (*insert_mac_addr)(struct ixgbe_hw *, u8 *, u32); + s32 (*set_vmdq)(struct ixgbe_hw *, u32, u32); + s32 (*set_vmdq_san_mac)(struct ixgbe_hw *, u32); + s32 (*clear_vmdq)(struct ixgbe_hw *, u32, u32); + s32 (*init_rx_addrs)(struct ixgbe_hw *); + s32 (*update_uc_addr_list)(struct ixgbe_hw *, u8 *, u32, + ixgbe_mc_addr_itr); + s32 (*update_mc_addr_list)(struct ixgbe_hw *, u8 *, u32, + ixgbe_mc_addr_itr, bool clear); + s32 (*enable_mc)(struct ixgbe_hw *); + s32 (*disable_mc)(struct ixgbe_hw *); + s32 (*clear_vfta)(struct ixgbe_hw *); + s32 (*set_vfta)(struct ixgbe_hw *, u32, u32, bool); + s32 (*set_vlvf)(struct ixgbe_hw *, u32, u32, bool, bool *); + s32 (*init_uta_tables)(struct ixgbe_hw *); + void (*set_mac_anti_spoofing)(struct ixgbe_hw *, bool, int); + void (*set_vlan_anti_spoofing)(struct ixgbe_hw *, bool, int); + + /* Flow Control */ + s32 (*fc_enable)(struct ixgbe_hw *); + + /* Manageability interface */ + s32 (*set_fw_drv_ver)(struct ixgbe_hw *, u8, u8, u8, u8); + s32 (*get_thermal_sensor_data)(struct ixgbe_hw *); + s32 (*init_thermal_sensor_thresh)(struct ixgbe_hw *hw); +}; + +struct ixgbe_phy_operations { + s32 (*identify)(struct ixgbe_hw *); + s32 (*identify_sfp)(struct ixgbe_hw *); + s32 (*init)(struct ixgbe_hw *); + s32 (*reset)(struct ixgbe_hw *); + s32 (*read_reg)(struct ixgbe_hw *, u32, u32, u16 *); + s32 (*write_reg)(struct ixgbe_hw *, u32, u32, u16); + s32 (*setup_link)(struct ixgbe_hw *); + s32 (*setup_link_speed)(struct ixgbe_hw *, ixgbe_link_speed, bool, + bool); + s32 (*check_link)(struct ixgbe_hw *, ixgbe_link_speed *, bool *); + s32 (*get_firmware_version)(struct ixgbe_hw *, u16 *); + s32 (*read_i2c_byte)(struct ixgbe_hw *, u8, u8, u8 *); + s32 (*write_i2c_byte)(struct ixgbe_hw *, u8, u8, u8); + s32 (*read_i2c_eeprom)(struct ixgbe_hw *, u8 , u8 *); + s32 (*write_i2c_eeprom)(struct ixgbe_hw *, u8, u8); + void (*i2c_bus_clear)(struct ixgbe_hw *); + s32 (*check_overtemp)(struct ixgbe_hw *); +}; + +struct ixgbe_eeprom_info { + struct ixgbe_eeprom_operations ops; + enum ixgbe_eeprom_type type; + u32 semaphore_delay; + u16 word_size; + u16 address_bits; + u16 word_page_size; +}; + +#define IXGBE_FLAGS_DOUBLE_RESET_REQUIRED 0x01 +struct ixgbe_mac_info { + struct ixgbe_mac_operations ops; + enum ixgbe_mac_type type; + u8 addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + u8 perm_addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + u8 san_addr[IXGBE_ETH_LENGTH_OF_ADDRESS]; + /* prefix for World Wide Node Name (WWNN) */ + u16 wwnn_prefix; + /* prefix for World Wide Port Name (WWPN) */ + u16 wwpn_prefix; +#define IXGBE_MAX_MTA 128 + u32 mta_shadow[IXGBE_MAX_MTA]; + s32 mc_filter_type; + u32 mcft_size; + u32 vft_size; + u32 num_rar_entries; + u32 rar_highwater; + u32 rx_pb_size; + u32 max_tx_queues; + u32 max_rx_queues; + u32 orig_autoc; + u8 san_mac_rar_index; + u32 orig_autoc2; + u16 max_msix_vectors; + bool arc_subsystem_valid; + bool orig_link_settings_stored; + bool autotry_restart; + u8 flags; + struct ixgbe_thermal_sensor_data thermal_sensor_data; +}; + +struct ixgbe_phy_info { + struct ixgbe_phy_operations ops; + enum ixgbe_phy_type type; + u32 addr; + u32 id; + enum ixgbe_sfp_type sfp_type; + bool sfp_setup_needed; + u32 revision; + enum ixgbe_media_type media_type; + bool reset_disable; + ixgbe_autoneg_advertised autoneg_advertised; + enum ixgbe_smart_speed smart_speed; + bool smart_speed_active; + bool multispeed_fiber; + bool reset_if_overtemp; + bool qsfp_shared_i2c_bus; +}; + +#include "ixgbe_mbx.h" + +struct ixgbe_mbx_operations { + void (*init_params)(struct ixgbe_hw *hw); + s32 (*read)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*write)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*read_posted)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*write_posted)(struct ixgbe_hw *, u32 *, u16, u16); + s32 (*check_for_msg)(struct ixgbe_hw *, u16); + s32 (*check_for_ack)(struct ixgbe_hw *, u16); + s32 (*check_for_rst)(struct ixgbe_hw *, u16); +}; + +struct ixgbe_mbx_stats { + u32 msgs_tx; + u32 msgs_rx; + + u32 acks; + u32 reqs; + u32 rsts; +}; + +struct ixgbe_mbx_info { + struct ixgbe_mbx_operations ops; + struct ixgbe_mbx_stats stats; + u32 timeout; + u32 udelay; + u32 v2p_mailbox; + u16 size; +}; + +struct ixgbe_hw { + u8 __iomem *hw_addr; + void *back; + struct ixgbe_mac_info mac; + struct ixgbe_addr_filter_info addr_ctrl; + struct ixgbe_fc_info fc; + struct ixgbe_phy_info phy; + struct ixgbe_eeprom_info eeprom; + struct ixgbe_bus_info bus; + struct ixgbe_mbx_info mbx; + u16 device_id; + u16 vendor_id; + u16 subsystem_device_id; + u16 subsystem_vendor_id; + u8 revision_id; + bool adapter_stopped; + bool force_full_reset; + bool allow_unsupported_sfp; +}; + +#define ixgbe_call_func(hw, func, params, error) \ + (func != NULL) ? func params : error + + +/* Error Codes */ +#define IXGBE_ERR_EEPROM -1 +#define IXGBE_ERR_EEPROM_CHECKSUM -2 +#define IXGBE_ERR_PHY -3 +#define IXGBE_ERR_CONFIG -4 +#define IXGBE_ERR_PARAM -5 +#define IXGBE_ERR_MAC_TYPE -6 +#define IXGBE_ERR_UNKNOWN_PHY -7 +#define IXGBE_ERR_LINK_SETUP -8 +#define IXGBE_ERR_ADAPTER_STOPPED -9 +#define IXGBE_ERR_INVALID_MAC_ADDR -10 +#define IXGBE_ERR_DEVICE_NOT_SUPPORTED -11 +#define IXGBE_ERR_MASTER_REQUESTS_PENDING -12 +#define IXGBE_ERR_INVALID_LINK_SETTINGS -13 +#define IXGBE_ERR_AUTONEG_NOT_COMPLETE -14 +#define IXGBE_ERR_RESET_FAILED -15 +#define IXGBE_ERR_SWFW_SYNC -16 +#define IXGBE_ERR_PHY_ADDR_INVALID -17 +#define IXGBE_ERR_I2C -18 +#define IXGBE_ERR_SFP_NOT_SUPPORTED -19 +#define IXGBE_ERR_SFP_NOT_PRESENT -20 +#define IXGBE_ERR_SFP_NO_INIT_SEQ_PRESENT -21 +#define IXGBE_ERR_NO_SAN_ADDR_PTR -22 +#define IXGBE_ERR_FDIR_REINIT_FAILED -23 +#define IXGBE_ERR_EEPROM_VERSION -24 +#define IXGBE_ERR_NO_SPACE -25 +#define IXGBE_ERR_OVERTEMP -26 +#define IXGBE_ERR_FC_NOT_NEGOTIATED -27 +#define IXGBE_ERR_FC_NOT_SUPPORTED -28 +#define IXGBE_ERR_SFP_SETUP_NOT_COMPLETE -30 +#define IXGBE_ERR_PBA_SECTION -31 +#define IXGBE_ERR_INVALID_ARGUMENT -32 +#define IXGBE_ERR_HOST_INTERFACE_COMMAND -33 +#define IXGBE_ERR_OUT_OF_MEM -34 + +#define IXGBE_NOT_IMPLEMENTED 0x7FFFFFFF + +#define UNREFERENCED_XPARAMETER + +#endif /* _IXGBE_TYPE_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c new file mode 100644 index 00000000..b99d9e84 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.c @@ -0,0 +1,937 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe_x540.h" +#include "ixgbe_type.h" +#include "ixgbe_api.h" +#include "ixgbe_common.h" +#include "ixgbe_phy.h" + +static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw); +static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw); +static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw); +static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw); + +/** + * ixgbe_init_ops_X540 - Inits func ptrs and MAC type + * @hw: pointer to hardware structure + * + * Initialize the function pointers and assign the MAC type for X540. + * Does not touch the hardware. + **/ +s32 ixgbe_init_ops_X540(struct ixgbe_hw *hw) +{ + struct ixgbe_mac_info *mac = &hw->mac; + struct ixgbe_phy_info *phy = &hw->phy; + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + s32 ret_val; + + ret_val = ixgbe_init_phy_ops_generic(hw); + ret_val = ixgbe_init_ops_generic(hw); + + + /* EEPROM */ + eeprom->ops.init_params = &ixgbe_init_eeprom_params_X540; + eeprom->ops.read = &ixgbe_read_eerd_X540; + eeprom->ops.read_buffer = &ixgbe_read_eerd_buffer_X540; + eeprom->ops.write = &ixgbe_write_eewr_X540; + eeprom->ops.write_buffer = &ixgbe_write_eewr_buffer_X540; + eeprom->ops.update_checksum = &ixgbe_update_eeprom_checksum_X540; + eeprom->ops.validate_checksum = &ixgbe_validate_eeprom_checksum_X540; + eeprom->ops.calc_checksum = &ixgbe_calc_eeprom_checksum_X540; + + /* PHY */ + phy->ops.init = &ixgbe_init_phy_ops_generic; + phy->ops.reset = NULL; + + /* MAC */ + mac->ops.reset_hw = &ixgbe_reset_hw_X540; + mac->ops.get_media_type = &ixgbe_get_media_type_X540; + mac->ops.get_supported_physical_layer = + &ixgbe_get_supported_physical_layer_X540; + mac->ops.read_analog_reg8 = NULL; + mac->ops.write_analog_reg8 = NULL; + mac->ops.start_hw = &ixgbe_start_hw_X540; + mac->ops.get_san_mac_addr = &ixgbe_get_san_mac_addr_generic; + mac->ops.set_san_mac_addr = &ixgbe_set_san_mac_addr_generic; + mac->ops.get_device_caps = &ixgbe_get_device_caps_generic; + mac->ops.get_wwn_prefix = &ixgbe_get_wwn_prefix_generic; + mac->ops.get_fcoe_boot_status = &ixgbe_get_fcoe_boot_status_generic; + mac->ops.acquire_swfw_sync = &ixgbe_acquire_swfw_sync_X540; + mac->ops.release_swfw_sync = &ixgbe_release_swfw_sync_X540; + mac->ops.disable_sec_rx_path = &ixgbe_disable_sec_rx_path_generic; + mac->ops.enable_sec_rx_path = &ixgbe_enable_sec_rx_path_generic; + + /* RAR, Multicast, VLAN */ + mac->ops.set_vmdq = &ixgbe_set_vmdq_generic; + mac->ops.set_vmdq_san_mac = &ixgbe_set_vmdq_san_mac_generic; + mac->ops.clear_vmdq = &ixgbe_clear_vmdq_generic; + mac->ops.insert_mac_addr = &ixgbe_insert_mac_addr_generic; + mac->rar_highwater = 1; + mac->ops.set_vfta = &ixgbe_set_vfta_generic; + mac->ops.set_vlvf = &ixgbe_set_vlvf_generic; + mac->ops.clear_vfta = &ixgbe_clear_vfta_generic; + mac->ops.init_uta_tables = &ixgbe_init_uta_tables_generic; + mac->ops.set_mac_anti_spoofing = &ixgbe_set_mac_anti_spoofing; + mac->ops.set_vlan_anti_spoofing = &ixgbe_set_vlan_anti_spoofing; + + /* Link */ + mac->ops.get_link_capabilities = + &ixgbe_get_copper_link_capabilities_generic; + mac->ops.setup_link = &ixgbe_setup_mac_link_X540; + mac->ops.setup_rxpba = &ixgbe_set_rxpba_generic; + mac->ops.check_link = &ixgbe_check_mac_link_generic; + + mac->mcft_size = 128; + mac->vft_size = 128; + mac->num_rar_entries = 128; + mac->rx_pb_size = 384; + mac->max_tx_queues = 128; + mac->max_rx_queues = 128; + mac->max_msix_vectors = ixgbe_get_pcie_msix_count_generic(hw); + + /* + * FWSM register + * ARC supported; valid only if manageability features are + * enabled. + */ + mac->arc_subsystem_valid = (IXGBE_READ_REG(hw, IXGBE_FWSM) & + IXGBE_FWSM_MODE_MASK) ? true : false; + + //hw->mbx.ops.init_params = ixgbe_init_mbx_params_pf; + + /* LEDs */ + mac->ops.blink_led_start = ixgbe_blink_led_start_X540; + mac->ops.blink_led_stop = ixgbe_blink_led_stop_X540; + + /* Manageability interface */ + mac->ops.set_fw_drv_ver = &ixgbe_set_fw_drv_ver_generic; + + return ret_val; +} + +/** + * ixgbe_get_link_capabilities_X540 - Determines link capabilities + * @hw: pointer to hardware structure + * @speed: pointer to link speed + * @autoneg: true when autoneg or autotry is enabled + * + * Determines the link capabilities by reading the AUTOC register. + **/ +s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, + bool *autoneg) +{ + ixgbe_get_copper_link_capabilities_generic(hw, speed, autoneg); + + return 0; +} + +/** + * ixgbe_get_media_type_X540 - Get media type + * @hw: pointer to hardware structure + * + * Returns the media type (fiber, copper, backplane) + **/ +enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw) +{ + return ixgbe_media_type_copper; +} + +/** + * ixgbe_setup_mac_link_X540 - Sets the auto advertised capabilities + * @hw: pointer to hardware structure + * @speed: new link speed + * @autoneg: true if autonegotiation enabled + * @autoneg_wait_to_complete: true when waiting for completion is needed + **/ +s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, + ixgbe_link_speed speed, bool autoneg, + bool autoneg_wait_to_complete) +{ + return hw->phy.ops.setup_link_speed(hw, speed, autoneg, + autoneg_wait_to_complete); +} + +/** + * ixgbe_reset_hw_X540 - Perform hardware reset + * @hw: pointer to hardware structure + * + * Resets the hardware by resetting the transmit and receive units, masks + * and clears all interrupts, and perform a reset. + **/ +s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw) +{ + s32 status = 0; + + /* + * Userland DPDK takes the ownershiop of device + * Kernel driver here used as the simple path for ethtool only + * Won't real reset device anyway + */ +#if 0 + u32 ctrl, i; + + /* Call adapter stop to disable tx/rx and clear interrupts */ + status = hw->mac.ops.stop_adapter(hw); + if (status != 0) + goto reset_hw_out; + + /* flush pending Tx transactions */ + ixgbe_clear_tx_pending(hw); + +mac_reset_top: + ctrl = IXGBE_CTRL_RST; + ctrl |= IXGBE_READ_REG(hw, IXGBE_CTRL); + IXGBE_WRITE_REG(hw, IXGBE_CTRL, ctrl); + IXGBE_WRITE_FLUSH(hw); + + /* Poll for reset bit to self-clear indicating reset is complete */ + for (i = 0; i < 10; i++) { + udelay(1); + ctrl = IXGBE_READ_REG(hw, IXGBE_CTRL); + if (!(ctrl & IXGBE_CTRL_RST_MASK)) + break; + } + + if (ctrl & IXGBE_CTRL_RST_MASK) { + status = IXGBE_ERR_RESET_FAILED; + hw_dbg(hw, "Reset polling failed to complete.\n"); + } + msleep(100); + + /* + * Double resets are required for recovery from certain error + * conditions. Between resets, it is necessary to stall to allow time + * for any pending HW events to complete. + */ + if (hw->mac.flags & IXGBE_FLAGS_DOUBLE_RESET_REQUIRED) { + hw->mac.flags &= ~IXGBE_FLAGS_DOUBLE_RESET_REQUIRED; + goto mac_reset_top; + } + + /* Set the Rx packet buffer size. */ + IXGBE_WRITE_REG(hw, IXGBE_RXPBSIZE(0), 384 << IXGBE_RXPBSIZE_SHIFT); + +#endif + + /* Store the permanent mac address */ + hw->mac.ops.get_mac_addr(hw, hw->mac.perm_addr); + + /* + * Store MAC address from RAR0, clear receive address registers, and + * clear the multicast table. Also reset num_rar_entries to 128, + * since we modify this value when programming the SAN MAC address. + */ + hw->mac.num_rar_entries = 128; + hw->mac.ops.init_rx_addrs(hw); + + /* Store the permanent SAN mac address */ + hw->mac.ops.get_san_mac_addr(hw, hw->mac.san_addr); + + /* Add the SAN MAC address to the RAR only if it's a valid address */ + if (ixgbe_validate_mac_addr(hw->mac.san_addr) == 0) { + hw->mac.ops.set_rar(hw, hw->mac.num_rar_entries - 1, + hw->mac.san_addr, 0, IXGBE_RAH_AV); + + /* Save the SAN MAC RAR index */ + hw->mac.san_mac_rar_index = hw->mac.num_rar_entries - 1; + + /* Reserve the last RAR for the SAN MAC address */ + hw->mac.num_rar_entries--; + } + + /* Store the alternative WWNN/WWPN prefix */ + hw->mac.ops.get_wwn_prefix(hw, &hw->mac.wwnn_prefix, + &hw->mac.wwpn_prefix); + +//reset_hw_out: + return status; +} + +/** + * ixgbe_start_hw_X540 - Prepare hardware for Tx/Rx + * @hw: pointer to hardware structure + * + * Starts the hardware using the generic start_hw function + * and the generation start_hw function. + * Then performs revision-specific operations, if any. + **/ +s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw) +{ + s32 ret_val = 0; + + ret_val = ixgbe_start_hw_generic(hw); + if (ret_val != 0) + goto out; + + ret_val = ixgbe_start_hw_gen2(hw); + +out: + return ret_val; +} + +/** + * ixgbe_get_supported_physical_layer_X540 - Returns physical layer type + * @hw: pointer to hardware structure + * + * Determines physical layer capabilities of the current configuration. + **/ +u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw) +{ + u32 physical_layer = IXGBE_PHYSICAL_LAYER_UNKNOWN; + u16 ext_ability = 0; + + hw->phy.ops.read_reg(hw, IXGBE_MDIO_PHY_EXT_ABILITY, + IXGBE_MDIO_PMA_PMD_DEV_TYPE, &ext_ability); + if (ext_ability & IXGBE_MDIO_PHY_10GBASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_10GBASE_T; + if (ext_ability & IXGBE_MDIO_PHY_1000BASET_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_1000BASE_T; + if (ext_ability & IXGBE_MDIO_PHY_100BASETX_ABILITY) + physical_layer |= IXGBE_PHYSICAL_LAYER_100BASE_TX; + + return physical_layer; +} + +/** + * ixgbe_init_eeprom_params_X540 - Initialize EEPROM params + * @hw: pointer to hardware structure + * + * Initializes the EEPROM parameters ixgbe_eeprom_info within the + * ixgbe_hw struct in order to set up EEPROM access. + **/ +s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw) +{ + struct ixgbe_eeprom_info *eeprom = &hw->eeprom; + u32 eec; + u16 eeprom_size; + + if (eeprom->type == ixgbe_eeprom_uninitialized) { + eeprom->semaphore_delay = 10; + eeprom->type = ixgbe_flash; + + eec = IXGBE_READ_REG(hw, IXGBE_EEC); + eeprom_size = (u16)((eec & IXGBE_EEC_SIZE) >> + IXGBE_EEC_SIZE_SHIFT); + eeprom->word_size = 1 << (eeprom_size + + IXGBE_EEPROM_WORD_SIZE_SHIFT); + + hw_dbg(hw, "Eeprom params: type = %d, size = %d\n", + eeprom->type, eeprom->word_size); + } + + return 0; +} + +/** + * ixgbe_read_eerd_X540- Read EEPROM word using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @data: word read from the EEPROM + * + * Reads a 16 bit word from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_read_eerd_generic(hw, offset, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_read_eerd_buffer_X540- Read EEPROM word(s) using EERD + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to read + * @words: number of words + * @data: word(s) read from the EEPROM + * + * Reads a 16 bit word(s) from the EEPROM using the EERD register. + **/ +s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, + u16 offset, u16 words, u16 *data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_read_eerd_buffer_generic(hw, offset, + words, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_write_eewr_X540 - Write EEPROM word using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @data: word write to the EEPROM + * + * Write a 16 bit word to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_write_eewr_generic(hw, offset, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_write_eewr_buffer_X540 - Write EEPROM word(s) using EEWR + * @hw: pointer to hardware structure + * @offset: offset of word in the EEPROM to write + * @words: number of words + * @data: word(s) write to the EEPROM + * + * Write a 16 bit word(s) to the EEPROM using the EEWR register. + **/ +s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, + u16 offset, u16 words, u16 *data) +{ + s32 status = 0; + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) + status = ixgbe_write_eewr_buffer_generic(hw, offset, + words, data); + else + status = IXGBE_ERR_SWFW_SYNC; + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + return status; +} + +/** + * ixgbe_calc_eeprom_checksum_X540 - Calculates and returns the checksum + * + * This function does not use synchronization for EERD and EEWR. It can + * be used internally by function which utilize ixgbe_acquire_swfw_sync_X540. + * + * @hw: pointer to hardware structure + **/ +u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) +{ + u16 i; + u16 j; + u16 checksum = 0; + u16 length = 0; + u16 pointer = 0; + u16 word = 0; + + /* + * Do not use hw->eeprom.ops.read because we do not want to take + * the synchronization semaphores here. Instead use + * ixgbe_read_eerd_generic + */ + + /* Include 0x0-0x3F in the checksum */ + for (i = 0; i < IXGBE_EEPROM_CHECKSUM; i++) { + if (ixgbe_read_eerd_generic(hw, i, &word) != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + checksum += word; + } + + /* + * Include all data from pointers 0x3, 0x6-0xE. This excludes the + * FW, PHY module, and PCIe Expansion/Option ROM pointers. + */ + for (i = IXGBE_PCIE_ANALOG_PTR; i < IXGBE_FW_PTR; i++) { + if (i == IXGBE_PHY_PTR || i == IXGBE_OPTION_ROM_PTR) + continue; + + if (ixgbe_read_eerd_generic(hw, i, &pointer) != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + + /* Skip pointer section if the pointer is invalid. */ + if (pointer == 0xFFFF || pointer == 0 || + pointer >= hw->eeprom.word_size) + continue; + + if (ixgbe_read_eerd_generic(hw, pointer, &length) != + 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + + /* Skip pointer section if length is invalid. */ + if (length == 0xFFFF || length == 0 || + (pointer + length) >= hw->eeprom.word_size) + continue; + + for (j = pointer+1; j <= pointer+length; j++) { + if (ixgbe_read_eerd_generic(hw, j, &word) != + 0) { + hw_dbg(hw, "EEPROM read failed\n"); + break; + } + checksum += word; + } + } + + checksum = (u16)IXGBE_EEPROM_SUM - checksum; + + return checksum; +} + +/** + * ixgbe_validate_eeprom_checksum_X540 - Validate EEPROM checksum + * @hw: pointer to hardware structure + * @checksum_val: calculated checksum + * + * Performs checksum calculation and validates the EEPROM checksum. If the + * caller does not need checksum_val, the value can be NULL. + **/ +s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, + u16 *checksum_val) +{ + s32 status; + u16 checksum; + u16 read_checksum = 0; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status != 0) { + hw_dbg(hw, "EEPROM read failed\n"); + goto out; + } + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + + /* + * Do not use hw->eeprom.ops.read because we do not want to take + * the synchronization semaphores twice here. + */ + ixgbe_read_eerd_generic(hw, IXGBE_EEPROM_CHECKSUM, + &read_checksum); + + /* + * Verify read checksum from EEPROM is the same as + * calculated checksum + */ + if (read_checksum != checksum) + status = IXGBE_ERR_EEPROM_CHECKSUM; + + /* If the user cares, return the calculated checksum */ + if (checksum_val) + *checksum_val = checksum; + } else { + status = IXGBE_ERR_SWFW_SYNC; + } + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); +out: + return status; +} + +/** + * ixgbe_update_eeprom_checksum_X540 - Updates the EEPROM checksum and flash + * @hw: pointer to hardware structure + * + * After writing EEPROM to shadow RAM using EEWR register, software calculates + * checksum and updates the EEPROM and instructs the hardware to update + * the flash. + **/ +s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw) +{ + s32 status; + u16 checksum; + + /* + * Read the first word from the EEPROM. If this times out or fails, do + * not continue or we could be in for a very long wait while every + * EEPROM read fails + */ + status = hw->eeprom.ops.read(hw, 0, &checksum); + + if (status != 0) + hw_dbg(hw, "EEPROM read failed\n"); + + if (hw->mac.ops.acquire_swfw_sync(hw, IXGBE_GSSR_EEP_SM) == + 0) { + checksum = hw->eeprom.ops.calc_checksum(hw); + + /* + * Do not use hw->eeprom.ops.write because we do not want to + * take the synchronization semaphores twice here. + */ + status = ixgbe_write_eewr_generic(hw, IXGBE_EEPROM_CHECKSUM, + checksum); + + if (status == 0) + status = ixgbe_update_flash_X540(hw); + else + status = IXGBE_ERR_SWFW_SYNC; + } + + hw->mac.ops.release_swfw_sync(hw, IXGBE_GSSR_EEP_SM); + + return status; +} + +/** + * ixgbe_update_flash_X540 - Instruct HW to copy EEPROM to Flash device + * @hw: pointer to hardware structure + * + * Set FLUP (bit 23) of the EEC register to instruct Hardware to copy + * EEPROM from shadow RAM to the flash device. + **/ +static s32 ixgbe_update_flash_X540(struct ixgbe_hw *hw) +{ + u32 flup; + s32 status = IXGBE_ERR_EEPROM; + + status = ixgbe_poll_flash_update_done_X540(hw); + if (status == IXGBE_ERR_EEPROM) { + hw_dbg(hw, "Flash update time out\n"); + goto out; + } + + flup = IXGBE_READ_REG(hw, IXGBE_EEC) | IXGBE_EEC_FLUP; + IXGBE_WRITE_REG(hw, IXGBE_EEC, flup); + + status = ixgbe_poll_flash_update_done_X540(hw); + if (status == 0) + hw_dbg(hw, "Flash update complete\n"); + else + hw_dbg(hw, "Flash update time out\n"); + + if (hw->revision_id == 0) { + flup = IXGBE_READ_REG(hw, IXGBE_EEC); + + if (flup & IXGBE_EEC_SEC1VAL) { + flup |= IXGBE_EEC_FLUP; + IXGBE_WRITE_REG(hw, IXGBE_EEC, flup); + } + + status = ixgbe_poll_flash_update_done_X540(hw); + if (status == 0) + hw_dbg(hw, "Flash update complete\n"); + else + hw_dbg(hw, "Flash update time out\n"); + } +out: + return status; +} + +/** + * ixgbe_poll_flash_update_done_X540 - Poll flash update status + * @hw: pointer to hardware structure + * + * Polls the FLUDONE (bit 26) of the EEC Register to determine when the + * flash update is done. + **/ +static s32 ixgbe_poll_flash_update_done_X540(struct ixgbe_hw *hw) +{ + u32 i; + u32 reg; + s32 status = IXGBE_ERR_EEPROM; + + for (i = 0; i < IXGBE_FLUDONE_ATTEMPTS; i++) { + reg = IXGBE_READ_REG(hw, IXGBE_EEC); + if (reg & IXGBE_EEC_FLUDONE) { + status = 0; + break; + } + udelay(5); + } + return status; +} + +/** + * ixgbe_acquire_swfw_sync_X540 - Acquire SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to acquire + * + * Acquires the SWFW semaphore thought the SW_FW_SYNC register for + * the specified function (CSR, PHY0, PHY1, NVM, Flash) + **/ +s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + u32 fwmask = mask << 5; + u32 hwmask = 0; + u32 timeout = 200; + u32 i; + s32 ret_val = 0; + + if (swmask == IXGBE_GSSR_EEP_SM) + hwmask = IXGBE_GSSR_FLASH_SM; + + /* SW only mask doesn't have FW bit pair */ + if (swmask == IXGBE_GSSR_SW_MNG_SM) + fwmask = 0; + + for (i = 0; i < timeout; i++) { + /* + * SW NVM semaphore bit is used for access to all + * SW_FW_SYNC bits (not just NVM) + */ + if (ixgbe_get_swfw_sync_semaphore(hw)) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + if (!(swfw_sync & (fwmask | swmask | hwmask))) { + swfw_sync |= swmask; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); + goto out; + } else { + /* + * Firmware currently using resource (fwmask), hardware + * currently using resource (hwmask), or other software + * thread currently using resource (swmask) + */ + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); + } + } + + /* Failed to get SW only semaphore */ + if (swmask == IXGBE_GSSR_SW_MNG_SM) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + /* If the resource is not released by the FW/HW the SW can assume that + * the FW/HW malfunctions. In that case the SW should sets the SW bit(s) + * of the requested resource(s) while ignoring the corresponding FW/HW + * bits in the SW_FW_SYNC register. + */ + swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + if (swfw_sync & (fwmask | hwmask)) { + if (ixgbe_get_swfw_sync_semaphore(hw)) { + ret_val = IXGBE_ERR_SWFW_SYNC; + goto out; + } + + swfw_sync |= swmask; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); + } + +out: + return ret_val; +} + +/** + * ixgbe_release_swfw_sync_X540 - Release SWFW semaphore + * @hw: pointer to hardware structure + * @mask: Mask to specify which semaphore to release + * + * Releases the SWFW semaphore through the SW_FW_SYNC register + * for the specified function (CSR, PHY0, PHY1, EVM, Flash) + **/ +void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask) +{ + u32 swfw_sync; + u32 swmask = mask; + + ixgbe_get_swfw_sync_semaphore(hw); + + swfw_sync = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + swfw_sync &= ~swmask; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swfw_sync); + + ixgbe_release_swfw_sync_semaphore(hw); + msleep(5); +} + +/** + * ixgbe_get_nvm_semaphore - Get hardware semaphore + * @hw: pointer to hardware structure + * + * Sets the hardware semaphores so SW/FW can gain control of shared resources + **/ +static s32 ixgbe_get_swfw_sync_semaphore(struct ixgbe_hw *hw) +{ + s32 status = IXGBE_ERR_EEPROM; + u32 timeout = 2000; + u32 i; + u32 swsm; + + /* Get SMBI software semaphore between device drivers first */ + for (i = 0; i < timeout; i++) { + /* + * If the SMBI bit is 0 when we read it, then the bit will be + * set and we have the semaphore + */ + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + if (!(swsm & IXGBE_SWSM_SMBI)) { + status = 0; + break; + } + udelay(50); + } + + /* Now get the semaphore between SW/FW through the REGSMP bit */ + if (status == 0) { + for (i = 0; i < timeout; i++) { + swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + if (!(swsm & IXGBE_SWFW_REGSMP)) + break; + + udelay(50); + } + + /* + * Release semaphores and return error if SW NVM semaphore + * was not granted because we don't have access to the EEPROM + */ + if (i >= timeout) { + hw_dbg(hw, "REGSMP Software NVM semaphore not " + "granted.\n"); + ixgbe_release_swfw_sync_semaphore(hw); + status = IXGBE_ERR_EEPROM; + } + } else { + hw_dbg(hw, "Software semaphore SMBI between device drivers " + "not granted.\n"); + } + + return status; +} + +/** + * ixgbe_release_nvm_semaphore - Release hardware semaphore + * @hw: pointer to hardware structure + * + * This function clears hardware semaphore bits. + **/ +static void ixgbe_release_swfw_sync_semaphore(struct ixgbe_hw *hw) +{ + u32 swsm; + + /* Release both semaphores by writing 0 to the bits REGSMP and SMBI */ + + swsm = IXGBE_READ_REG(hw, IXGBE_SWSM); + swsm &= ~IXGBE_SWSM_SMBI; + IXGBE_WRITE_REG(hw, IXGBE_SWSM, swsm); + + swsm = IXGBE_READ_REG(hw, IXGBE_SWFW_SYNC); + swsm &= ~IXGBE_SWFW_REGSMP; + IXGBE_WRITE_REG(hw, IXGBE_SWFW_SYNC, swsm); + + IXGBE_WRITE_FLUSH(hw); +} + +/** + * ixgbe_blink_led_start_X540 - Blink LED based on index. + * @hw: pointer to hardware structure + * @index: led number to blink + * + * Devices that implement the version 2 interface: + * X540 + **/ +s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index) +{ + u32 macc_reg; + u32 ledctl_reg; + ixgbe_link_speed speed; + bool link_up; + + /* + * Link should be up in order for the blink bit in the LED control + * register to work. Force link and speed in the MAC if link is down. + * This will be reversed when we stop the blinking. + */ + hw->mac.ops.check_link(hw, &speed, &link_up, false); + if (link_up == false) { + macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC); + macc_reg |= IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS; + IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg); + } + /* Set the LED to LINK_UP + BLINK. */ + ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + ledctl_reg &= ~IXGBE_LED_MODE_MASK(index); + ledctl_reg |= IXGBE_LED_BLINK(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} + +/** + * ixgbe_blink_led_stop_X540 - Stop blinking LED based on index. + * @hw: pointer to hardware structure + * @index: led number to stop blinking + * + * Devices that implement the version 2 interface: + * X540 + **/ +s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index) +{ + u32 macc_reg; + u32 ledctl_reg; + + /* Restore the LED to its default value. */ + ledctl_reg = IXGBE_READ_REG(hw, IXGBE_LEDCTL); + ledctl_reg &= ~IXGBE_LED_MODE_MASK(index); + ledctl_reg |= IXGBE_LED_LINK_ACTIVE << IXGBE_LED_MODE_SHIFT(index); + ledctl_reg &= ~IXGBE_LED_BLINK(index); + IXGBE_WRITE_REG(hw, IXGBE_LEDCTL, ledctl_reg); + + /* Unforce link and speed in the MAC. */ + macc_reg = IXGBE_READ_REG(hw, IXGBE_MACC); + macc_reg &= ~(IXGBE_MACC_FLU | IXGBE_MACC_FSV_10G | IXGBE_MACC_FS); + IXGBE_WRITE_REG(hw, IXGBE_MACC, macc_reg); + IXGBE_WRITE_FLUSH(hw); + + return 0; +} diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h new file mode 100644 index 00000000..77e8952d --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/ixgbe_x540.h @@ -0,0 +1,58 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _IXGBE_X540_H_ +#define _IXGBE_X540_H_ + +#include "ixgbe_type.h" + +s32 ixgbe_get_link_capabilities_X540(struct ixgbe_hw *hw, + ixgbe_link_speed *speed, bool *autoneg); +enum ixgbe_media_type ixgbe_get_media_type_X540(struct ixgbe_hw *hw); +s32 ixgbe_setup_mac_link_X540(struct ixgbe_hw *hw, ixgbe_link_speed speed, + bool autoneg, bool link_up_wait_to_complete); +s32 ixgbe_reset_hw_X540(struct ixgbe_hw *hw); +s32 ixgbe_start_hw_X540(struct ixgbe_hw *hw); +u32 ixgbe_get_supported_physical_layer_X540(struct ixgbe_hw *hw); + +s32 ixgbe_init_eeprom_params_X540(struct ixgbe_hw *hw); +s32 ixgbe_read_eerd_X540(struct ixgbe_hw *hw, u16 offset, u16 *data); +s32 ixgbe_read_eerd_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words, + u16 *data); +s32 ixgbe_write_eewr_X540(struct ixgbe_hw *hw, u16 offset, u16 data); +s32 ixgbe_write_eewr_buffer_X540(struct ixgbe_hw *hw, u16 offset, u16 words, + u16 *data); +s32 ixgbe_update_eeprom_checksum_X540(struct ixgbe_hw *hw); +s32 ixgbe_validate_eeprom_checksum_X540(struct ixgbe_hw *hw, u16 *checksum_val); +u16 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw); + +s32 ixgbe_acquire_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask); +void ixgbe_release_swfw_sync_X540(struct ixgbe_hw *hw, u16 mask); + +s32 ixgbe_blink_led_start_X540(struct ixgbe_hw *hw, u32 index); +s32 ixgbe_blink_led_stop_X540(struct ixgbe_hw *hw, u32 index); +#endif /* _IXGBE_X540_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c new file mode 100644 index 00000000..5f2523ed --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.c @@ -0,0 +1,1246 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#include "ixgbe.h" +#include "kcompat.h" + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +/* From lib/vsprintf.c */ +#include + +static int skip_atoi(const char **s) +{ + int i=0; + + while (isdigit(**s)) + i = i*10 + *((*s)++) - '0'; + return i; +} + +#define _kc_ZEROPAD 1 /* pad with zero */ +#define _kc_SIGN 2 /* unsigned/signed long */ +#define _kc_PLUS 4 /* show plus */ +#define _kc_SPACE 8 /* space if plus */ +#define _kc_LEFT 16 /* left justified */ +#define _kc_SPECIAL 32 /* 0x */ +#define _kc_LARGE 64 /* use 'ABCDEF' instead of 'abcdef' */ + +static char * number(char * buf, char * end, long long num, int base, int size, int precision, int type) +{ + char c,sign,tmp[66]; + const char *digits; + const char small_digits[] = "0123456789abcdefghijklmnopqrstuvwxyz"; + const char large_digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + int i; + + digits = (type & _kc_LARGE) ? large_digits : small_digits; + if (type & _kc_LEFT) + type &= ~_kc_ZEROPAD; + if (base < 2 || base > 36) + return 0; + c = (type & _kc_ZEROPAD) ? '0' : ' '; + sign = 0; + if (type & _kc_SIGN) { + if (num < 0) { + sign = '-'; + num = -num; + size--; + } else if (type & _kc_PLUS) { + sign = '+'; + size--; + } else if (type & _kc_SPACE) { + sign = ' '; + size--; + } + } + if (type & _kc_SPECIAL) { + if (base == 16) + size -= 2; + else if (base == 8) + size--; + } + i = 0; + if (num == 0) + tmp[i++]='0'; + else while (num != 0) + tmp[i++] = digits[do_div(num,base)]; + if (i > precision) + precision = i; + size -= precision; + if (!(type&(_kc_ZEROPAD+_kc_LEFT))) { + while(size-->0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + } + if (sign) { + if (buf <= end) + *buf = sign; + ++buf; + } + if (type & _kc_SPECIAL) { + if (base==8) { + if (buf <= end) + *buf = '0'; + ++buf; + } else if (base==16) { + if (buf <= end) + *buf = '0'; + ++buf; + if (buf <= end) + *buf = digits[33]; + ++buf; + } + } + if (!(type & _kc_LEFT)) { + while (size-- > 0) { + if (buf <= end) + *buf = c; + ++buf; + } + } + while (i < precision--) { + if (buf <= end) + *buf = '0'; + ++buf; + } + while (i-- > 0) { + if (buf <= end) + *buf = tmp[i]; + ++buf; + } + while (size-- > 0) { + if (buf <= end) + *buf = ' '; + ++buf; + } + return buf; +} + +int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args) +{ + int len; + unsigned long long num; + int i, base; + char *str, *end, c; + const char *s; + + int flags; /* flags to number() */ + + int field_width; /* width of output field */ + int precision; /* min. # of digits for integers; max + number of chars for from string */ + int qualifier; /* 'h', 'l', or 'L' for integer fields */ + /* 'z' support added 23/7/1999 S.H. */ + /* 'z' changed to 'Z' --davidm 1/25/99 */ + + str = buf; + end = buf + size - 1; + + if (end < buf - 1) { + end = ((void *) -1); + size = end - buf + 1; + } + + for (; *fmt ; ++fmt) { + if (*fmt != '%') { + if (str <= end) + *str = *fmt; + ++str; + continue; + } + + /* process flags */ + flags = 0; + repeat: + ++fmt; /* this also skips first '%' */ + switch (*fmt) { + case '-': flags |= _kc_LEFT; goto repeat; + case '+': flags |= _kc_PLUS; goto repeat; + case ' ': flags |= _kc_SPACE; goto repeat; + case '#': flags |= _kc_SPECIAL; goto repeat; + case '0': flags |= _kc_ZEROPAD; goto repeat; + } + + /* get field width */ + field_width = -1; + if (isdigit(*fmt)) + field_width = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + field_width = va_arg(args, int); + if (field_width < 0) { + field_width = -field_width; + flags |= _kc_LEFT; + } + } + + /* get the precision */ + precision = -1; + if (*fmt == '.') { + ++fmt; + if (isdigit(*fmt)) + precision = skip_atoi(&fmt); + else if (*fmt == '*') { + ++fmt; + /* it's the next argument */ + precision = va_arg(args, int); + } + if (precision < 0) + precision = 0; + } + + /* get the conversion qualifier */ + qualifier = -1; + if (*fmt == 'h' || *fmt == 'l' || *fmt == 'L' || *fmt =='Z') { + qualifier = *fmt; + ++fmt; + } + + /* default base */ + base = 10; + + switch (*fmt) { + case 'c': + if (!(flags & _kc_LEFT)) { + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + } + c = (unsigned char) va_arg(args, int); + if (str <= end) + *str = c; + ++str; + while (--field_width > 0) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 's': + s = va_arg(args, char *); + if (!s) + s = ""; + + len = strnlen(s, precision); + + if (!(flags & _kc_LEFT)) { + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + } + for (i = 0; i < len; ++i) { + if (str <= end) + *str = *s; + ++str; ++s; + } + while (len < field_width--) { + if (str <= end) + *str = ' '; + ++str; + } + continue; + + case 'p': + if (field_width == -1) { + field_width = 2*sizeof(void *); + flags |= _kc_ZEROPAD; + } + str = number(str, end, + (unsigned long) va_arg(args, void *), + 16, field_width, precision, flags); + continue; + + + case 'n': + /* FIXME: + * What does C99 say about the overflow case here? */ + if (qualifier == 'l') { + long * ip = va_arg(args, long *); + *ip = (str - buf); + } else if (qualifier == 'Z') { + size_t * ip = va_arg(args, size_t *); + *ip = (str - buf); + } else { + int * ip = va_arg(args, int *); + *ip = (str - buf); + } + continue; + + case '%': + if (str <= end) + *str = '%'; + ++str; + continue; + + /* integer number formats - set up the flags and "break" */ + case 'o': + base = 8; + break; + + case 'X': + flags |= _kc_LARGE; + case 'x': + base = 16; + break; + + case 'd': + case 'i': + flags |= _kc_SIGN; + case 'u': + break; + + default: + if (str <= end) + *str = '%'; + ++str; + if (*fmt) { + if (str <= end) + *str = *fmt; + ++str; + } else { + --fmt; + } + continue; + } + if (qualifier == 'L') + num = va_arg(args, long long); + else if (qualifier == 'l') { + num = va_arg(args, unsigned long); + if (flags & _kc_SIGN) + num = (signed long) num; + } else if (qualifier == 'Z') { + num = va_arg(args, size_t); + } else if (qualifier == 'h') { + num = (unsigned short) va_arg(args, int); + if (flags & _kc_SIGN) + num = (signed short) num; + } else { + num = va_arg(args, unsigned int); + if (flags & _kc_SIGN) + num = (signed int) num; + } + str = number(str, end, num, base, + field_width, precision, flags); + } + if (str <= end) + *str = '\0'; + else if (size > 0) + /* don't write out a null byte if the buf size is zero */ + *end = '\0'; + /* the trailing null byte doesn't count towards the total + * ++str; + */ + return str-buf; +} + +int _kc_snprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = _kc_vsnprintf(buf,size,fmt,args); + va_end(args); + return i; +} +#endif /* < 2.4.8 */ + + + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + unsigned int vfs_assigned = 0; +#ifdef HAVE_PCI_DEV_FLAGS_ASSIGNED + int pos; + struct pci_dev *vfdev; + unsigned short dev_id; + + /* only search if we are a PF */ + if (!dev->is_physfn) + return 0; + + /* find SR-IOV capability */ + pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); + if (!pos) + return 0; + + /* + * * determine the device ID for the VFs, the vendor ID will be the + * * same as the PF so there is no need to check for that one + * */ + pci_read_config_word(dev, pos + PCI_SRIOV_VF_DID, &dev_id); + + /* loop through all the VFs to see if we own any that are assigned */ + vfdev = pci_get_device(dev->vendor, dev_id, NULL); + while (vfdev) { + /* + * * It is considered assigned if it is a virtual function with + * * our dev as the physical function and the assigned bit is set + * */ + if (vfdev->is_virtfn && (vfdev->physfn == dev) && + (vfdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED)) + vfs_assigned++; + + vfdev = pci_get_device(dev->vendor, dev_id, vfdev); + } + +#endif /* HAVE_PCI_DEV_FLAGS_ASSIGNED */ + return vfs_assigned; +} + +#endif /* CONFIG_PCI_IOV */ +#endif /* 3.10.0 */ + + + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#if defined(CONFIG_HIGHMEM) + +#ifndef PCI_DRAM_OFFSET +#define PCI_DRAM_OFFSET 0 +#endif + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return ((u64) (page - mem_map) << PAGE_SHIFT) + offset + + PCI_DRAM_OFFSET; +} + +#else /* CONFIG_HIGHMEM */ + +u64 +_kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, + size_t size, int direction) +{ + return pci_map_single(dev, (void *)page_address(page) + offset, size, + direction); +} + +#endif /* CONFIG_HIGHMEM */ + +void +_kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, + int direction) +{ + return pci_unmap_single(dev, dma_addr, size, direction); +} + +#endif /* 2.4.13 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +int +_kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask) +{ + if (!pci_dma_supported(dev, mask)) + return -EIO; + dev->dma_mask = mask; + return 0; +} + +int +_kc_pci_request_regions(struct pci_dev *dev, char *res_name) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) { + if (!request_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) { + if (!request_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i), res_name)) { + pci_release_regions(dev); + return -EBUSY; + } + } + } + return 0; +} + +void +_kc_pci_release_regions(struct pci_dev *dev) +{ + int i; + + for (i = 0; i < 6; i++) { + if (pci_resource_len(dev, i) == 0) + continue; + + if (pci_resource_flags(dev, i) & IORESOURCE_IO) + release_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + + else if (pci_resource_flags(dev, i) & IORESOURCE_MEM) + release_mem_region(pci_resource_start(dev, i), pci_resource_len(dev, i)); + } +} + +/**************************************/ +/* NETWORK DRIVER API */ + +struct net_device * +_kc_alloc_etherdev(int sizeof_priv) +{ + struct net_device *dev; + int alloc_size; + + alloc_size = sizeof(*dev) + sizeof_priv + IFNAMSIZ + 31; + dev = kzalloc(alloc_size, GFP_KERNEL); + if (!dev) + return NULL; + + if (sizeof_priv) + dev->priv = (void *) (((unsigned long)(dev + 1) + 31) & ~31); + dev->name[0] = '\0'; + ether_setup(dev); + + return dev; +} + +int +_kc_is_valid_ether_addr(u8 *addr) +{ + const char zaddr[6] = { 0, }; + + return !(addr[0] & 1) && memcmp(addr, zaddr, 6); +} + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +int +_kc_pci_set_power_state(struct pci_dev *dev, int state) +{ + return 0; +} + +int +_kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable) +{ + return 0; +} + +#endif /* 2.4.6 => 2.4.3 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, + int off, int size) +{ + skb_frag_t *frag = &skb_shinfo(skb)->frags[i]; + frag->page = page; + frag->page_offset = off; + frag->size = size; + skb_shinfo(skb)->nr_frags = i + 1; +} + +/* + * Original Copyright: + * find_next_bit.c: fallback find next bit implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + */ + +/** + * find_next_bit - find the next set bit in a memory region + * @addr: The address to base the search on + * @offset: The bitnumber to start searching at + * @size: The maximum size to search + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + const unsigned long *p = addr + BITOP_WORD(offset); + unsigned long result = offset & ~(BITS_PER_LONG-1); + unsigned long tmp; + + if (offset >= size) + return size; + size -= result; + offset %= BITS_PER_LONG; + if (offset) { + tmp = *(p++); + tmp &= (~0UL << offset); + if (size < BITS_PER_LONG) + goto found_first; + if (tmp) + goto found_middle; + size -= BITS_PER_LONG; + result += BITS_PER_LONG; + } + while (size & ~(BITS_PER_LONG-1)) { + if ((tmp = *(p++))) + goto found_middle; + result += BITS_PER_LONG; + size -= BITS_PER_LONG; + } + if (!size) + return result; + tmp = *p; + +found_first: + tmp &= (~0UL >> (BITS_PER_LONG - size)); + if (tmp == 0UL) /* Are any bits set? */ + return result + size; /* Nope. */ +found_middle: + return result + ffs(tmp); +} + +size_t _kc_strlcpy(char *dest, const char *src, size_t size) +{ + size_t ret = strlen(src); + + if (size) { + size_t len = (ret >= size) ? size - 1 : ret; + memcpy(dest, src, len); + dest[len] = '\0'; + } + return ret; +} + +#endif /* 2.6.0 => 2.4.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...) +{ + va_list args; + int i; + + va_start(args, fmt); + i = vsnprintf(buf, size, fmt, args); + va_end(args); + return (i >= size) ? (size - 1) : i; +} +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES) = {1}; +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +char *_kc_kstrdup(const char *s, unsigned int gfp) +{ + size_t len; + char *buf; + + if (!s) + return NULL; + + len = strlen(s) + 1; + buf = kmalloc(len, gfp); + if (buf) + memcpy(buf, s, len); + return buf; +} +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +void *_kc_kzalloc(size_t size, int flags) +{ + void *ret = kmalloc(size, flags); + if (ret) + memset(ret, 0, size); + return ret; +} +#endif /* <= 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) +int _kc_skb_pad(struct sk_buff *skb, int pad) +{ + int ntail; + + /* If the skbuff is non linear tailroom is always zero.. */ + if(!skb_cloned(skb) && skb_tailroom(skb) >= pad) { + memset(skb->data+skb->len, 0, pad); + return 0; + } + + ntail = skb->data_len + pad - (skb->end - skb->tail); + if (likely(skb_cloned(skb) || ntail > 0)) { + if (pskb_expand_head(skb, 0, ntail, GFP_ATOMIC)); + goto free_skb; + } + +#ifdef MAX_SKB_FRAGS + if (skb_is_nonlinear(skb) && + !__pskb_pull_tail(skb, skb->data_len)) + goto free_skb; + +#endif + memset(skb->data + skb->len, 0, pad); + return 0; + +free_skb: + kfree_skb(skb); + return -ENOMEM; +} + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +int _kc_pci_save_state(struct pci_dev *pdev) +{ + struct adapter_struct *adapter = pci_get_drvdata(pdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset, pcie_link_status; + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) + /* no ->dev for 2.4 kernels */ + WARN_ON(pdev->dev.driver_data == NULL); +#endif + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset) { + if (!pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + } + pci_config_space_ich8lan(); +#ifdef HAVE_PCI_ERS + if (adapter->config_space == NULL) +#else + WARN_ON(adapter->config_space != NULL); +#endif + adapter->config_space = kmalloc(size, GFP_KERNEL); + if (!adapter->config_space) { + printk(KERN_ERR "Out of memory in pci_save_state\n"); + return -ENOMEM; + } + for (i = 0; i < (size / 4); i++) + pci_read_config_dword(pdev, i * 4, &adapter->config_space[i]); + return 0; +} + +void _kc_pci_restore_state(struct pci_dev *pdev) +{ + struct adapter_struct *adapter = pci_get_drvdata(pdev); + int size = PCI_CONFIG_SPACE_LEN, i; + u16 pcie_cap_offset; + u16 pcie_link_status; + + if (adapter->config_space != NULL) { + pcie_cap_offset = pci_find_capability(pdev, PCI_CAP_ID_EXP); + if (pcie_cap_offset && + !pci_read_config_word(pdev, + pcie_cap_offset + PCIE_LINK_STATUS, + &pcie_link_status)) + size = PCIE_CONFIG_SPACE_LEN; + + pci_config_space_ich8lan(); + for (i = 0; i < (size / 4); i++) + pci_write_config_dword(pdev, i * 4, adapter->config_space[i]); +#ifndef HAVE_PCI_ERS + kfree(adapter->config_space); + adapter->config_space = NULL; +#endif + } +} +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +void _kc_free_netdev(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + + if (adapter->config_space != NULL) + kfree(adapter->config_space); +#ifdef CONFIG_SYSFS + if (netdev->reg_state == NETREG_UNINITIALIZED) { + kfree((char *)netdev - netdev->padded); + } else { + BUG_ON(netdev->reg_state != NETREG_UNREGISTERED); + netdev->reg_state = NETREG_RELEASED; + class_device_put(&netdev->class_dev); + } +#else + kfree((char *)netdev - netdev->padded); +#endif +} +#endif + +void *_kc_kmemdup(const void *src, size_t len, unsigned gfp) +{ + void *p; + + p = kzalloc(len, gfp); + if (p) + memcpy(p, src, len); + return p; +} +#endif /* <= 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +/* hexdump code taken from lib/hexdump.c */ +static void _kc_hex_dump_to_buffer(const void *buf, size_t len, int rowsize, + int groupsize, unsigned char *linebuf, + size_t linebuflen, bool ascii) +{ + const u8 *ptr = buf; + u8 ch; + int j, lx = 0; + int ascii_column; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + if (!len) + goto nil; + if (len > rowsize) /* limit to one line at a time */ + len = rowsize; + if ((len % groupsize) != 0) /* no mixed size output */ + groupsize = 1; + + switch (groupsize) { + case 8: { + const u64 *ptr8 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + ascii_column = 17 * ngroups + 2; + break; + } + + case 4: { + const u32 *ptr4 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%8.8x", j ? " " : "", *(ptr4 + j)); + ascii_column = 9 * ngroups + 2; + break; + } + + case 2: { + const u16 *ptr2 = buf; + int ngroups = len / groupsize; + + for (j = 0; j < ngroups; j++) + lx += scnprintf((char *)(linebuf + lx), linebuflen - lx, + "%s%4.4x", j ? " " : "", *(ptr2 + j)); + ascii_column = 5 * ngroups + 2; + break; + } + + default: + for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + ch = ptr[j]; + linebuf[lx++] = hex_asc(ch >> 4); + linebuf[lx++] = hex_asc(ch & 0x0f); + linebuf[lx++] = ' '; + } + if (j) + lx--; + + ascii_column = 3 * rowsize + 2; + break; + } + if (!ascii) + goto nil; + + while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + linebuf[lx++] = ' '; + for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) + linebuf[lx++] = (isascii(ptr[j]) && isprint(ptr[j])) ? ptr[j] + : '.'; +nil: + linebuf[lx++] = '\0'; +} + +void _kc_print_hex_dump(const char *level, + const char *prefix_str, int prefix_type, + int rowsize, int groupsize, + const void *buf, size_t len, bool ascii) +{ + const u8 *ptr = buf; + int i, linelen, remaining = len; + unsigned char linebuf[200]; + + if (rowsize != 16 && rowsize != 32) + rowsize = 16; + + for (i = 0; i < len; i += rowsize) { + linelen = min(remaining, rowsize); + remaining -= rowsize; + _kc_hex_dump_to_buffer(ptr + i, linelen, rowsize, groupsize, + linebuf, sizeof(linebuf), ascii); + + switch (prefix_type) { + case DUMP_PREFIX_ADDRESS: + printk("%s%s%*p: %s\n", level, prefix_str, + (int)(2 * sizeof(void *)), ptr + i, linebuf); + break; + case DUMP_PREFIX_OFFSET: + printk("%s%s%.8x: %s\n", level, prefix_str, i, linebuf); + break; + default: + printk("%s%s%s\n", level, prefix_str, linebuf); + break; + } + } +} +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) +int ixgbe_dcb_netlink_register(void) +{ + return 0; +} + +int ixgbe_dcb_netlink_unregister(void) +{ + return 0; +} + +int ixgbe_copy_dcb_cfg(struct ixgbe_adapter *adapter, int tc_max) +{ + return 0; +} +#endif /* < 2.6.23 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifdef NAPI +struct net_device *napi_to_poll_dev(struct napi_struct *napi) +{ + struct adapter_q_vector *q_vector = container_of(napi, + struct adapter_q_vector, + napi); + return &q_vector->poll_dev; +} + +int __kc_adapter_clean(struct net_device *netdev, int *budget) +{ + int work_done; + int work_to_do = min(*budget, netdev->quota); + /* kcompat.h netif_napi_add puts napi struct in "fake netdev->priv" */ + struct napi_struct *napi = netdev->priv; + work_done = napi->poll(napi, work_to_do); + *budget -= work_done; + netdev->quota -= work_done; + return (work_done >= work_to_do) ? 1 : 0; +} +#endif /* NAPI */ +#endif /* <= 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +void _kc_pci_disable_link_state(struct pci_dev *pdev, int state) +{ + struct pci_dev *parent = pdev->bus->self; + u16 link_state; + int pos; + + if (!parent) + return; + + pos = pci_find_capability(parent, PCI_CAP_ID_EXP); + if (pos) { + pci_read_config_word(parent, pos + PCI_EXP_LNKCTL, &link_state); + link_state &= ~state; + pci_write_config_word(parent, pos + PCI_EXP_LNKCTL, link_state); + } +} +#endif /* < 2.6.26 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +#ifdef HAVE_TX_MQ +void _kc_netif_tx_stop_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_stop_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_stop_subqueue(netdev, i); +} +void _kc_netif_tx_wake_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_wake_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_wake_subqueue(netdev, i); +} +void _kc_netif_tx_start_all_queues(struct net_device *netdev) +{ + struct adapter_struct *adapter = netdev_priv(netdev); + int i; + + netif_start_queue(netdev); + if (netif_is_multiqueue(netdev)) + for (i = 0; i < adapter->num_tx_queues; i++) + netif_start_subqueue(netdev, i); +} +#endif /* HAVE_TX_MQ */ + +#ifndef __WARN_printf +void __kc_warn_slowpath(const char *file, int line, const char *fmt, ...) +{ + va_list args; + + printk(KERN_WARNING "------------[ cut here ]------------\n"); + printk(KERN_WARNING "WARNING: at %s:%d %s()\n", file, line); + va_start(args, fmt); + vprintk(fmt, args); + va_end(args); + + dump_stack(); +} +#endif /* __WARN_printf */ +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) + +int +_kc_pci_prepare_to_sleep(struct pci_dev *dev) +{ + pci_power_t target_state; + int error; + + target_state = pci_choose_state(dev, PMSG_SUSPEND); + + pci_enable_wake(dev, target_state, true); + + error = pci_set_power_state(dev, target_state); + + if (error) + pci_enable_wake(dev, target_state, false); + + return error; +} + +int +_kc_pci_wake_from_d3(struct pci_dev *dev, bool enable) +{ + int err; + + err = pci_enable_wake(dev, PCI_D3cold, enable); + if (err) + goto out; + + err = pci_enable_wake(dev, PCI_D3hot, enable); + +out: + return err; +} +#endif /* < 2.6.28 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +void _kc_skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, + int off, int size) +{ + skb_fill_page_desc(skb, i, page, off, size); + skb->len += size; + skb->data_len += size; + skb->truesize += size; +} +#endif /* < 3.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) +#ifdef HAVE_NETDEV_SELECT_QUEUE +#include +static u32 _kc_simple_tx_hashrnd; +static u32 _kc_simple_tx_hashrnd_initialized; + +u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb) +{ + u32 addr1, addr2, ports; + u32 hash, ihl; + u8 ip_proto = 0; + + if (unlikely(!_kc_simple_tx_hashrnd_initialized)) { + get_random_bytes(&_kc_simple_tx_hashrnd, 4); + _kc_simple_tx_hashrnd_initialized = 1; + } + + switch (skb->protocol) { + case htons(ETH_P_IP): + if (!(ip_hdr(skb)->frag_off & htons(IP_MF | IP_OFFSET))) + ip_proto = ip_hdr(skb)->protocol; + addr1 = ip_hdr(skb)->saddr; + addr2 = ip_hdr(skb)->daddr; + ihl = ip_hdr(skb)->ihl; + break; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + case htons(ETH_P_IPV6): + ip_proto = ipv6_hdr(skb)->nexthdr; + addr1 = ipv6_hdr(skb)->saddr.s6_addr32[3]; + addr2 = ipv6_hdr(skb)->daddr.s6_addr32[3]; + ihl = (40 >> 2); + break; +#endif + default: + return 0; + } + + + switch (ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + case IPPROTO_DCCP: + case IPPROTO_ESP: + case IPPROTO_AH: + case IPPROTO_SCTP: + case IPPROTO_UDPLITE: + ports = *((u32 *) (skb_network_header(skb) + (ihl * 4))); + break; + + default: + ports = 0; + break; + } + + hash = jhash_3words(addr1, addr2, ports, _kc_simple_tx_hashrnd); + + return (u16) (((u64) hash * dev->real_num_tx_queues) >> 32); +} +#endif /* HAVE_NETDEV_SELECT_QUEUE */ +#endif /* < 2.6.30 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) +#ifdef HAVE_TX_MQ +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +void _kc_netif_set_real_num_tx_queues(struct net_device *dev, unsigned int txq) +{ + unsigned int real_num = dev->real_num_tx_queues; + struct Qdisc *qdisc; + int i; + + if (unlikely(txq > dev->num_tx_queues)) + ; + else if (txq > real_num) + dev->real_num_tx_queues = txq; + else if ( txq < real_num) { + dev->real_num_tx_queues = txq; + for (i = txq; i < dev->num_tx_queues; i++) { + qdisc = netdev_get_tx_queue(dev, i)->qdisc; + if (qdisc) { + spin_lock_bh(qdisc_lock(qdisc)); + qdisc_reset(qdisc); + spin_unlock_bh(qdisc_lock(qdisc)); + } + } + } +} +#endif /* CONFIG_NETDEVICES_MULTIQUEUE */ +#endif /* HAVE_TX_MQ */ +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +static const u32 _kc_flags_dup_features = + (ETH_FLAG_LRO | ETH_FLAG_NTUPLE | ETH_FLAG_RXHASH); + +u32 _kc_ethtool_op_get_flags(struct net_device *dev) +{ + return dev->features & _kc_flags_dup_features; +} + +int _kc_ethtool_op_set_flags(struct net_device *dev, u32 data, u32 supported) +{ + if (data & ~supported) + return -EINVAL; + + dev->features = ((dev->features & ~_kc_flags_dup_features) | + (data & _kc_flags_dup_features)); + return 0; +} +#endif /* < 2.6.36 */ + +/******************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) +u8 _kc_netdev_get_num_tc(struct net_device *dev) +{ + struct adapter_struct *kc_adapter = netdev_priv(dev); + if (kc_adapter->flags & IXGBE_FLAG_DCB_ENABLED) + return kc_adapter->tc; + else + return 0; +} + +u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up) +{ + struct adapter_struct *kc_adapter = netdev_priv(dev); + int tc; + u8 map; + + for (tc = 0; tc < IXGBE_DCB_MAX_TRAFFIC_CLASS; tc++) { + map = kc_adapter->dcb_cfg.tc_config[tc].path[0].up_to_tc_bitmap; + + if (map & (1 << up)) + return tc; + } + + return 0; +} +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ +#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h new file mode 100644 index 00000000..bf27579b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/ethtool/ixgbe/kcompat.h @@ -0,0 +1,3143 @@ +/******************************************************************************* + + Intel 10 Gigabit PCI Express Linux driver + Copyright(c) 1999 - 2012 Intel Corporation. + + This program is free software; you can redistribute it and/or modify it + under the terms and conditions of the GNU General Public License, + version 2, as published by the Free Software Foundation. + + This program is distributed in the hope it will be useful, but WITHOUT + ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + You should have received a copy of the GNU General Public License along with + this program; if not, write to the Free Software Foundation, Inc., + 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + + The full GNU General Public License is included in this distribution in + the file called "COPYING". + + Contact Information: + e1000-devel Mailing List + Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497 + +*******************************************************************************/ + +#ifndef _KCOMPAT_H_ +#define _KCOMPAT_H_ + +#ifndef LINUX_VERSION_CODE +#include +#else +#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c)) +#endif +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* NAPI enable/disable flags here */ +/* enable NAPI for ixgbe by default */ +#undef CONFIG_IXGBE_NAPI +#define CONFIG_IXGBE_NAPI +#define NAPI +#ifdef CONFIG_IXGBE_NAPI +#undef NAPI +#define NAPI +#endif /* CONFIG_IXGBE_NAPI */ +#ifdef IXGBE_NAPI +#undef NAPI +#define NAPI +#endif /* IXGBE_NAPI */ +#ifdef IXGBE_NO_NAPI +#undef NAPI +#endif /* IXGBE_NO_NAPI */ + +#define adapter_struct ixgbe_adapter +#define adapter_q_vector ixgbe_q_vector + +/* and finally set defines so that the code sees the changes */ +#ifdef NAPI +#ifndef CONFIG_IXGBE_NAPI +#define CONFIG_IXGBE_NAPI +#endif +#else +#undef CONFIG_IXGBE_NAPI +#endif /* NAPI */ + +/* packet split disable/enable */ +#ifdef DISABLE_PACKET_SPLIT +#ifndef CONFIG_IXGBE_DISABLE_PACKET_SPLIT +#define CONFIG_IXGBE_DISABLE_PACKET_SPLIT +#endif +#endif /* DISABLE_PACKET_SPLIT */ + +/* MSI compatibility code for all kernels and drivers */ +#ifdef DISABLE_PCI_MSI +#undef CONFIG_PCI_MSI +#endif +#ifndef CONFIG_PCI_MSI +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +struct msix_entry { + u16 vector; /* kernel uses to write allocated vector */ + u16 entry; /* driver uses to specify entry, OS writes */ +}; +#endif +#undef pci_enable_msi +#define pci_enable_msi(a) -ENOTSUPP +#undef pci_disable_msi +#define pci_disable_msi(a) do {} while (0) +#undef pci_enable_msix +#define pci_enable_msix(a, b, c) -ENOTSUPP +#undef pci_disable_msix +#define pci_disable_msix(a) do {} while (0) +#define msi_remove_pci_irq_vectors(a) do {} while (0) +#endif /* CONFIG_PCI_MSI */ +#ifdef DISABLE_PM +#undef CONFIG_PM +#endif + +#ifdef DISABLE_NET_POLL_CONTROLLER +#undef CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef PMSG_SUSPEND +#define PMSG_SUSPEND 3 +#endif + +/* generic boolean compatibility */ +#undef TRUE +#undef FALSE +#define TRUE true +#define FALSE false +#ifdef GCC_VERSION +#if ( GCC_VERSION < 3000 ) +#define _Bool char +#endif +#else +#define _Bool char +#endif + +/* kernels less than 2.4.14 don't have this */ +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef module_param +#define module_param(v,t,p) MODULE_PARM(v, "i"); +#endif + +#ifndef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffffffffffffULL +#endif + +#ifndef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0x00000000ffffffffULL +#endif + +#ifndef PCI_CAP_ID_EXP +#define PCI_CAP_ID_EXP 0x10 +#endif + +#ifndef PCIE_LINK_STATE_L0S +#define PCIE_LINK_STATE_L0S 1 +#endif +#ifndef PCIE_LINK_STATE_L1 +#define PCIE_LINK_STATE_L1 2 +#endif + +#ifndef mmiowb +#ifdef CONFIG_IA64 +#define mmiowb() asm volatile ("mf.a" ::: "memory") +#else +#define mmiowb() +#endif +#endif + +#ifndef SET_NETDEV_DEV +#define SET_NETDEV_DEV(net, pdev) +#endif + +#if !defined(HAVE_FREE_NETDEV) && ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#define free_netdev(x) kfree(x) +#endif + +#ifdef HAVE_POLL_CONTROLLER +#define CONFIG_NET_POLL_CONTROLLER +#endif + +#ifndef SKB_DATAREF_SHIFT +/* if we do not have the infrastructure to detect if skb_header is cloned + just return false in all cases */ +#define skb_header_cloned(x) 0 +#endif + +#ifndef NETIF_F_GSO +#define gso_size tso_size +#define gso_segs tso_segs +#endif + +#ifndef NETIF_F_GRO +#define vlan_gro_receive(_napi, _vlgrp, _vlan, _skb) \ + vlan_hwaccel_receive_skb(_skb, _vlgrp, _vlan) +#define napi_gro_receive(_napi, _skb) netif_receive_skb(_skb) +#endif + +#ifndef NETIF_F_SCTP_CSUM +#define NETIF_F_SCTP_CSUM 0 +#endif + +#ifndef NETIF_F_LRO +#define NETIF_F_LRO (1 << 15) +#endif + +#ifndef NETIF_F_NTUPLE +#define NETIF_F_NTUPLE (1 << 27) +#endif + +#ifndef IPPROTO_SCTP +#define IPPROTO_SCTP 132 +#endif + +#ifndef CHECKSUM_PARTIAL +#define CHECKSUM_PARTIAL CHECKSUM_HW +#define CHECKSUM_COMPLETE CHECKSUM_HW +#endif + +#ifndef __read_mostly +#define __read_mostly +#endif + +#ifndef MII_RESV1 +#define MII_RESV1 0x17 /* Reserved... */ +#endif + +#ifndef unlikely +#define unlikely(_x) _x +#define likely(_x) _x +#endif + +#ifndef WARN_ON +#define WARN_ON(x) +#endif + +#ifndef PCI_DEVICE +#define PCI_DEVICE(vend,dev) \ + .vendor = (vend), .device = (dev), \ + .subvendor = PCI_ANY_ID, .subdevice = PCI_ANY_ID +#endif + +#ifndef node_online +#define node_online(node) ((node) == 0) +#endif + +#ifndef num_online_cpus +#define num_online_cpus() smp_num_cpus +#endif + +#ifndef cpu_online +#define cpu_online(cpuid) test_bit((cpuid), &cpu_online_map) +#endif + +#ifndef _LINUX_RANDOM_H +#include +#endif + +#ifndef DECLARE_BITMAP +#ifndef BITS_TO_LONGS +#define BITS_TO_LONGS(bits) (((bits)+BITS_PER_LONG-1)/BITS_PER_LONG) +#endif +#define DECLARE_BITMAP(name,bits) long name[BITS_TO_LONGS(bits)] +#endif + +#ifndef VLAN_HLEN +#define VLAN_HLEN 4 +#endif + +#ifndef VLAN_ETH_HLEN +#define VLAN_ETH_HLEN 18 +#endif + +#ifndef VLAN_ETH_FRAME_LEN +#define VLAN_ETH_FRAME_LEN 1518 +#endif + +#if !defined(IXGBE_DCA) && !defined(IGB_DCA) +#define dca_get_tag(b) 0 +#define dca_add_requester(a) -1 +#define dca_remove_requester(b) do { } while(0) +#define DCA_PROVIDER_ADD 0x0001 +#define DCA_PROVIDER_REMOVE 0x0002 +#endif + +#ifndef DCA_GET_TAG_TWO_ARGS +#define dca3_get_tag(a,b) dca_get_tag(b) +#endif + +#ifndef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#if defined(__i386__) || defined(__x86_64__) +#define CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#endif +#endif + +/* taken from 2.6.24 definition in linux/kernel.h */ +#ifndef IS_ALIGNED +#define IS_ALIGNED(x,a) (((x) % ((typeof(x))(a))) == 0) +#endif + +#ifndef NETIF_F_HW_VLAN_TX +struct _kc_vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + __be16 h_vlan_proto; + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_ethhdr _kc_vlan_ethhdr +struct _kc_vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; +#define vlan_hdr _kc_vlan_hdr +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#define vlan_tx_tag_present(_skb) 0 +#define vlan_tx_tag_get(_skb) 0 +#endif +#endif + +#ifndef VLAN_PRIO_SHIFT +#define VLAN_PRIO_SHIFT 13 +#endif + + +#ifndef __GFP_COLD +#define __GFP_COLD 0 +#endif + +/*****************************************************************************/ +/* Installations with ethtool version without eeprom, adapter id, or statistics + * support */ + +#ifndef ETH_GSTRING_LEN +#define ETH_GSTRING_LEN 32 +#endif + +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x1d +#undef ethtool_drvinfo +#define ethtool_drvinfo k_ethtool_drvinfo +struct k_ethtool_drvinfo { + u32 cmd; + char driver[32]; + char version[32]; + char fw_version[32]; + char bus_info[32]; + char reserved1[32]; + char reserved2[16]; + u32 n_stats; + u32 testinfo_len; + u32 eedump_len; + u32 regdump_len; +}; + +struct ethtool_stats { + u32 cmd; + u32 n_stats; + u64 data[0]; +}; +#endif /* ETHTOOL_GSTATS */ + +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x1c +#endif /* ETHTOOL_PHYS_ID */ + +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x1b +enum ethtool_stringset { + ETH_SS_TEST = 0, + ETH_SS_STATS, +}; +struct ethtool_gstrings { + u32 cmd; /* ETHTOOL_GSTRINGS */ + u32 string_set; /* string set id e.c. ETH_SS_TEST, etc*/ + u32 len; /* number of strings in the string set */ + u8 data[0]; +}; +#endif /* ETHTOOL_GSTRINGS */ + +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x1a +enum ethtool_test_flags { + ETH_TEST_FL_OFFLINE = (1 << 0), + ETH_TEST_FL_FAILED = (1 << 1), +}; +struct ethtool_test { + u32 cmd; + u32 flags; + u32 reserved; + u32 len; + u64 data[0]; +}; +#endif /* ETHTOOL_TEST */ + +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0xb +#undef ETHTOOL_GREGS +struct ethtool_eeprom { + u32 cmd; + u32 magic; + u32 offset; + u32 len; + u8 data[0]; +}; + +struct ethtool_value { + u32 cmd; + u32 data; +}; +#endif /* ETHTOOL_GEEPROM */ + +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0xa +#endif /* ETHTOOL_GLINK */ + +#ifndef ETHTOOL_GWOL +#define ETHTOOL_GWOL 0x5 +#define ETHTOOL_SWOL 0x6 +#define SOPASS_MAX 6 +struct ethtool_wolinfo { + u32 cmd; + u32 supported; + u32 wolopts; + u8 sopass[SOPASS_MAX]; /* SecureOn(tm) password */ +}; +#endif /* ETHTOOL_GWOL */ + +#ifndef ETHTOOL_GREGS +#define ETHTOOL_GREGS 0x00000004 /* Get NIC registers */ +#define ethtool_regs _kc_ethtool_regs +/* for passing big chunks of data */ +struct _kc_ethtool_regs { + u32 cmd; + u32 version; /* driver-specific, indicates different chips/revs */ + u32 len; /* bytes */ + u8 data[0]; +}; +#endif /* ETHTOOL_GREGS */ + +#ifndef ETHTOOL_GMSGLVL +#define ETHTOOL_GMSGLVL 0x00000007 /* Get driver message level */ +#endif +#ifndef ETHTOOL_SMSGLVL +#define ETHTOOL_SMSGLVL 0x00000008 /* Set driver msg level, priv. */ +#endif +#ifndef ETHTOOL_NWAY_RST +#define ETHTOOL_NWAY_RST 0x00000009 /* Restart autonegotiation, priv */ +#endif +#ifndef ETHTOOL_GLINK +#define ETHTOOL_GLINK 0x0000000a /* Get link status */ +#endif +#ifndef ETHTOOL_GEEPROM +#define ETHTOOL_GEEPROM 0x0000000b /* Get EEPROM data */ +#endif +#ifndef ETHTOOL_SEEPROM +#define ETHTOOL_SEEPROM 0x0000000c /* Set EEPROM data */ +#endif +#ifndef ETHTOOL_GCOALESCE +#define ETHTOOL_GCOALESCE 0x0000000e /* Get coalesce config */ +/* for configuring coalescing parameters of chip */ +#define ethtool_coalesce _kc_ethtool_coalesce +struct _kc_ethtool_coalesce { + u32 cmd; /* ETHTOOL_{G,S}COALESCE */ + + /* How many usecs to delay an RX interrupt after + * a packet arrives. If 0, only rx_max_coalesced_frames + * is used. + */ + u32 rx_coalesce_usecs; + + /* How many packets to delay an RX interrupt after + * a packet arrives. If 0, only rx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause RX interrupts to never be + * generated. + */ + u32 rx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 rx_coalesce_usecs_irq; + u32 rx_max_coalesced_frames_irq; + + /* How many usecs to delay a TX interrupt after + * a packet is sent. If 0, only tx_max_coalesced_frames + * is used. + */ + u32 tx_coalesce_usecs; + + /* How many packets to delay a TX interrupt after + * a packet is sent. If 0, only tx_coalesce_usecs is + * used. It is illegal to set both usecs and max frames + * to zero as this would cause TX interrupts to never be + * generated. + */ + u32 tx_max_coalesced_frames; + + /* Same as above two parameters, except that these values + * apply while an IRQ is being serviced by the host. Not + * all cards support this feature and the values are ignored + * in that case. + */ + u32 tx_coalesce_usecs_irq; + u32 tx_max_coalesced_frames_irq; + + /* How many usecs to delay in-memory statistics + * block updates. Some drivers do not have an in-memory + * statistic block, and in such cases this value is ignored. + * This value must not be zero. + */ + u32 stats_block_coalesce_usecs; + + /* Adaptive RX/TX coalescing is an algorithm implemented by + * some drivers to improve latency under low packet rates and + * improve throughput under high packet rates. Some drivers + * only implement one of RX or TX adaptive coalescing. Anything + * not implemented by the driver causes these values to be + * silently ignored. + */ + u32 use_adaptive_rx_coalesce; + u32 use_adaptive_tx_coalesce; + + /* When the packet rate (measured in packets per second) + * is below pkt_rate_low, the {rx,tx}_*_low parameters are + * used. + */ + u32 pkt_rate_low; + u32 rx_coalesce_usecs_low; + u32 rx_max_coalesced_frames_low; + u32 tx_coalesce_usecs_low; + u32 tx_max_coalesced_frames_low; + + /* When the packet rate is below pkt_rate_high but above + * pkt_rate_low (both measured in packets per second) the + * normal {rx,tx}_* coalescing parameters are used. + */ + + /* When the packet rate is (measured in packets per second) + * is above pkt_rate_high, the {rx,tx}_*_high parameters are + * used. + */ + u32 pkt_rate_high; + u32 rx_coalesce_usecs_high; + u32 rx_max_coalesced_frames_high; + u32 tx_coalesce_usecs_high; + u32 tx_max_coalesced_frames_high; + + /* How often to do adaptive coalescing packet rate sampling, + * measured in seconds. Must not be zero. + */ + u32 rate_sample_interval; +}; +#endif /* ETHTOOL_GCOALESCE */ + +#ifndef ETHTOOL_SCOALESCE +#define ETHTOOL_SCOALESCE 0x0000000f /* Set coalesce config. */ +#endif +#ifndef ETHTOOL_GRINGPARAM +#define ETHTOOL_GRINGPARAM 0x00000010 /* Get ring parameters */ +/* for configuring RX/TX ring parameters */ +#define ethtool_ringparam _kc_ethtool_ringparam +struct _kc_ethtool_ringparam { + u32 cmd; /* ETHTOOL_{G,S}RINGPARAM */ + + /* Read only attributes. These indicate the maximum number + * of pending RX/TX ring entries the driver will allow the + * user to set. + */ + u32 rx_max_pending; + u32 rx_mini_max_pending; + u32 rx_jumbo_max_pending; + u32 tx_max_pending; + + /* Values changeable by the user. The valid values are + * in the range 1 to the "*_max_pending" counterpart above. + */ + u32 rx_pending; + u32 rx_mini_pending; + u32 rx_jumbo_pending; + u32 tx_pending; +}; +#endif /* ETHTOOL_GRINGPARAM */ + +#ifndef ETHTOOL_SRINGPARAM +#define ETHTOOL_SRINGPARAM 0x00000011 /* Set ring parameters, priv. */ +#endif +#ifndef ETHTOOL_GPAUSEPARAM +#define ETHTOOL_GPAUSEPARAM 0x00000012 /* Get pause parameters */ +/* for configuring link flow control parameters */ +#define ethtool_pauseparam _kc_ethtool_pauseparam +struct _kc_ethtool_pauseparam { + u32 cmd; /* ETHTOOL_{G,S}PAUSEPARAM */ + + /* If the link is being auto-negotiated (via ethtool_cmd.autoneg + * being true) the user may set 'autoneg' here non-zero to have the + * pause parameters be auto-negotiated too. In such a case, the + * {rx,tx}_pause values below determine what capabilities are + * advertised. + * + * If 'autoneg' is zero or the link is not being auto-negotiated, + * then {rx,tx}_pause force the driver to use/not-use pause + * flow control. + */ + u32 autoneg; + u32 rx_pause; + u32 tx_pause; +}; +#endif /* ETHTOOL_GPAUSEPARAM */ + +#ifndef ETHTOOL_SPAUSEPARAM +#define ETHTOOL_SPAUSEPARAM 0x00000013 /* Set pause parameters. */ +#endif +#ifndef ETHTOOL_GRXCSUM +#define ETHTOOL_GRXCSUM 0x00000014 /* Get RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_SRXCSUM +#define ETHTOOL_SRXCSUM 0x00000015 /* Set RX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GTXCSUM +#define ETHTOOL_GTXCSUM 0x00000016 /* Get TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STXCSUM +#define ETHTOOL_STXCSUM 0x00000017 /* Set TX hw csum enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_GSG +#define ETHTOOL_GSG 0x00000018 /* Get scatter-gather enable + * (ethtool_value) */ +#endif +#ifndef ETHTOOL_SSG +#define ETHTOOL_SSG 0x00000019 /* Set scatter-gather enable + * (ethtool_value). */ +#endif +#ifndef ETHTOOL_TEST +#define ETHTOOL_TEST 0x0000001a /* execute NIC self-test, priv. */ +#endif +#ifndef ETHTOOL_GSTRINGS +#define ETHTOOL_GSTRINGS 0x0000001b /* get specified string set */ +#endif +#ifndef ETHTOOL_PHYS_ID +#define ETHTOOL_PHYS_ID 0x0000001c /* identify the NIC */ +#endif +#ifndef ETHTOOL_GSTATS +#define ETHTOOL_GSTATS 0x0000001d /* get NIC-specific statistics */ +#endif +#ifndef ETHTOOL_GTSO +#define ETHTOOL_GTSO 0x0000001e /* Get TSO enable (ethtool_value) */ +#endif +#ifndef ETHTOOL_STSO +#define ETHTOOL_STSO 0x0000001f /* Set TSO enable (ethtool_value) */ +#endif + +#ifndef ETHTOOL_BUSINFO_LEN +#define ETHTOOL_BUSINFO_LEN 32 +#endif + +#ifndef RHEL_RELEASE_CODE +/* NOTE: RHEL_RELEASE_* introduced in RHEL4.5 */ +#define RHEL_RELEASE_CODE 0 +#endif +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif +#ifndef AX_RELEASE_CODE +#define AX_RELEASE_CODE 0 +#endif +#ifndef AX_RELEASE_VERSION +#define AX_RELEASE_VERSION(a,b) (((a) << 8) + (b)) +#endif + +/* SuSE version macro is the same as Linux kernel version */ +#ifndef SLE_VERSION +#define SLE_VERSION(a,b,c) KERNEL_VERSION(a,b,c) +#endif +#ifndef SLE_VERSION_CODE +#ifdef CONFIG_SUSE_KERNEL +/* SLES11 GA is 2.6.27 based */ +#if ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,27) ) +#define SLE_VERSION_CODE SLE_VERSION(11,0,0) +#elif ( LINUX_VERSION_CODE == KERNEL_VERSION(2,6,32) ) +/* SLES11 SP1 is 2.6.32 based */ +#define SLE_VERSION_CODE SLE_VERSION(11,1,0) +#else +#define SLE_VERSION_CODE 0 +#endif +#else /* CONFIG_SUSE_KERNEL */ +#define SLE_VERSION_CODE 0 +#endif /* CONFIG_SUSE_KERNEL */ +#endif /* SLE_VERSION_CODE */ + +#ifdef __KLOCWORK__ +#ifdef ARRAY_SIZE +#undef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif +#endif /* __KLOCWORK__ */ + +/*****************************************************************************/ +/* 2.4.3 => 2.4.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,3) ) + +/**************************************/ +/* PCI DRIVER API */ + +#ifndef pci_set_dma_mask +#define pci_set_dma_mask _kc_pci_set_dma_mask +extern int _kc_pci_set_dma_mask(struct pci_dev *dev, dma_addr_t mask); +#endif + +#ifndef pci_request_regions +#define pci_request_regions _kc_pci_request_regions +extern int _kc_pci_request_regions(struct pci_dev *pdev, char *res_name); +#endif + +#ifndef pci_release_regions +#define pci_release_regions _kc_pci_release_regions +extern void _kc_pci_release_regions(struct pci_dev *pdev); +#endif + +/**************************************/ +/* NETWORK DRIVER API */ + +#ifndef alloc_etherdev +#define alloc_etherdev _kc_alloc_etherdev +extern struct net_device * _kc_alloc_etherdev(int sizeof_priv); +#endif + +#ifndef is_valid_ether_addr +#define is_valid_ether_addr _kc_is_valid_ether_addr +extern int _kc_is_valid_ether_addr(u8 *addr); +#endif + +/**************************************/ +/* MISCELLANEOUS */ + +#ifndef INIT_TQUEUE +#define INIT_TQUEUE(_tq, _routine, _data) \ + do { \ + INIT_LIST_HEAD(&(_tq)->list); \ + (_tq)->sync = 0; \ + (_tq)->routine = _routine; \ + (_tq)->data = _data; \ + } while (0) +#endif + +#endif /* 2.4.3 => 2.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,5) ) +/* Generic MII registers. */ +#define MII_BMCR 0x00 /* Basic mode control register */ +#define MII_BMSR 0x01 /* Basic mode status register */ +#define MII_PHYSID1 0x02 /* PHYS ID 1 */ +#define MII_PHYSID2 0x03 /* PHYS ID 2 */ +#define MII_ADVERTISE 0x04 /* Advertisement control reg */ +#define MII_LPA 0x05 /* Link partner ability reg */ +#define MII_EXPANSION 0x06 /* Expansion register */ +/* Basic mode control register. */ +#define BMCR_FULLDPLX 0x0100 /* Full duplex */ +#define BMCR_ANENABLE 0x1000 /* Enable auto negotiation */ +/* Basic mode status register. */ +#define BMSR_ERCAP 0x0001 /* Ext-reg capability */ +#define BMSR_ANEGCAPABLE 0x0008 /* Able to do auto-negotiation */ +#define BMSR_10HALF 0x0800 /* Can do 10mbps, half-duplex */ +#define BMSR_10FULL 0x1000 /* Can do 10mbps, full-duplex */ +#define BMSR_100HALF 0x2000 /* Can do 100mbps, half-duplex */ +#define BMSR_100FULL 0x4000 /* Can do 100mbps, full-duplex */ +/* Advertisement control register. */ +#define ADVERTISE_CSMA 0x0001 /* Only selector supported */ +#define ADVERTISE_10HALF 0x0020 /* Try for 10mbps half-duplex */ +#define ADVERTISE_10FULL 0x0040 /* Try for 10mbps full-duplex */ +#define ADVERTISE_100HALF 0x0080 /* Try for 100mbps half-duplex */ +#define ADVERTISE_100FULL 0x0100 /* Try for 100mbps full-duplex */ +#define ADVERTISE_ALL (ADVERTISE_10HALF | ADVERTISE_10FULL | \ + ADVERTISE_100HALF | ADVERTISE_100FULL) +/* Expansion register for auto-negotiation. */ +#define EXPANSION_ENABLENPAGE 0x0004 /* This enables npage words */ +#endif + +/*****************************************************************************/ +/* 2.4.6 => 2.4.3 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,6) ) + +#ifndef pci_set_power_state +#define pci_set_power_state _kc_pci_set_power_state +extern int _kc_pci_set_power_state(struct pci_dev *dev, int state); +#endif + +#ifndef pci_enable_wake +#define pci_enable_wake _kc_pci_enable_wake +extern int _kc_pci_enable_wake(struct pci_dev *pdev, u32 state, int enable); +#endif + +#ifndef pci_disable_device +#define pci_disable_device _kc_pci_disable_device +extern void _kc_pci_disable_device(struct pci_dev *pdev); +#endif + +/* PCI PM entry point syntax changed, so don't support suspend/resume */ +#undef CONFIG_PM + +#endif /* 2.4.6 => 2.4.3 */ + +#ifndef HAVE_PCI_SET_MWI +#define pci_set_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word | \ + PCI_COMMAND_INVALIDATE); +#define pci_clear_mwi(X) pci_write_config_word(X, \ + PCI_COMMAND, adapter->hw.bus.pci_cmd_word & \ + ~PCI_COMMAND_INVALIDATE); +#endif + +/*****************************************************************************/ +/* 2.4.10 => 2.4.9 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,10) ) + +/**************************************/ +/* MODULE API */ + +#ifndef MODULE_LICENSE + #define MODULE_LICENSE(X) +#endif + +/**************************************/ +/* OTHER */ + +#undef min +#define min(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x < _y ? _x : _y; }) + +#undef max +#define max(x,y) ({ \ + const typeof(x) _x = (x); \ + const typeof(y) _y = (y); \ + (void) (&_x == &_y); \ + _x > _y ? _x : _y; }) + +#define min_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x < _y ? _x : _y; }) + +#define max_t(type,x,y) ({ \ + type _x = (x); \ + type _y = (y); \ + _x > _y ? _x : _y; }) + +#ifndef list_for_each_safe +#define list_for_each_safe(pos, n, head) \ + for (pos = (head)->next, n = pos->next; pos != (head); \ + pos = n, n = pos->next) +#endif + +#ifndef ____cacheline_aligned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_aligned_in_smp ____cacheline_aligned +#else +#define ____cacheline_aligned_in_smp +#endif /* CONFIG_SMP */ +#endif + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,8) ) +extern int _kc_snprintf(char * buf, size_t size, const char *fmt, ...); +#define snprintf(buf, size, fmt, args...) _kc_snprintf(buf, size, fmt, ##args) +extern int _kc_vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#define vsnprintf(buf, size, fmt, args) _kc_vsnprintf(buf, size, fmt, args) +#else /* 2.4.8 => 2.4.9 */ +extern int snprintf(char * buf, size_t size, const char *fmt, ...); +extern int vsnprintf(char *buf, size_t size, const char *fmt, va_list args); +#endif +#endif /* 2.4.10 -> 2.4.6 */ + + +/*****************************************************************************/ +/* 2.4.12 => 2.4.10 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,12) ) +#ifndef HAVE_NETIF_MSG +#define HAVE_NETIF_MSG 1 +enum { + NETIF_MSG_DRV = 0x0001, + NETIF_MSG_PROBE = 0x0002, + NETIF_MSG_LINK = 0x0004, + NETIF_MSG_TIMER = 0x0008, + NETIF_MSG_IFDOWN = 0x0010, + NETIF_MSG_IFUP = 0x0020, + NETIF_MSG_RX_ERR = 0x0040, + NETIF_MSG_TX_ERR = 0x0080, + NETIF_MSG_TX_QUEUED = 0x0100, + NETIF_MSG_INTR = 0x0200, + NETIF_MSG_TX_DONE = 0x0400, + NETIF_MSG_RX_STATUS = 0x0800, + NETIF_MSG_PKTDATA = 0x1000, + NETIF_MSG_HW = 0x2000, + NETIF_MSG_WOL = 0x4000, +}; + +#define netif_msg_drv(p) ((p)->msg_enable & NETIF_MSG_DRV) +#define netif_msg_probe(p) ((p)->msg_enable & NETIF_MSG_PROBE) +#define netif_msg_link(p) ((p)->msg_enable & NETIF_MSG_LINK) +#define netif_msg_timer(p) ((p)->msg_enable & NETIF_MSG_TIMER) +#define netif_msg_ifdown(p) ((p)->msg_enable & NETIF_MSG_IFDOWN) +#define netif_msg_ifup(p) ((p)->msg_enable & NETIF_MSG_IFUP) +#define netif_msg_rx_err(p) ((p)->msg_enable & NETIF_MSG_RX_ERR) +#define netif_msg_tx_err(p) ((p)->msg_enable & NETIF_MSG_TX_ERR) +#define netif_msg_tx_queued(p) ((p)->msg_enable & NETIF_MSG_TX_QUEUED) +#define netif_msg_intr(p) ((p)->msg_enable & NETIF_MSG_INTR) +#define netif_msg_tx_done(p) ((p)->msg_enable & NETIF_MSG_TX_DONE) +#define netif_msg_rx_status(p) ((p)->msg_enable & NETIF_MSG_RX_STATUS) +#define netif_msg_pktdata(p) ((p)->msg_enable & NETIF_MSG_PKTDATA) +#endif /* !HAVE_NETIF_MSG */ +#endif /* 2.4.12 => 2.4.10 */ + +/*****************************************************************************/ +/* 2.4.13 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,13) ) + +/**************************************/ +/* PCI DMA MAPPING */ + +#ifndef virt_to_page + #define virt_to_page(v) (mem_map + (virt_to_phys(v) >> PAGE_SHIFT)) +#endif + +#ifndef pci_map_page +#define pci_map_page _kc_pci_map_page +extern u64 _kc_pci_map_page(struct pci_dev *dev, struct page *page, unsigned long offset, size_t size, int direction); +#endif + +#ifndef pci_unmap_page +#define pci_unmap_page _kc_pci_unmap_page +extern void _kc_pci_unmap_page(struct pci_dev *dev, u64 dma_addr, size_t size, int direction); +#endif + +/* pci_set_dma_mask takes dma_addr_t, which is only 32-bits prior to 2.4.13 */ + +#undef DMA_32BIT_MASK +#define DMA_32BIT_MASK 0xffffffff +#undef DMA_64BIT_MASK +#define DMA_64BIT_MASK 0xffffffff + +/**************************************/ +/* OTHER */ + +#ifndef cpu_relax +#define cpu_relax() rep_nop() +#endif + +struct vlan_ethhdr { + unsigned char h_dest[ETH_ALEN]; + unsigned char h_source[ETH_ALEN]; + unsigned short h_vlan_proto; + unsigned short h_vlan_TCI; + unsigned short h_vlan_encapsulated_proto; +}; +#endif /* 2.4.13 => 2.4.12 */ + +/*****************************************************************************/ +/* 2.4.17 => 2.4.12 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,17) ) + +#ifndef __devexit_p + #define __devexit_p(x) &(x) +#endif + +#endif /* 2.4.17 => 2.4.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,18) ) +#define NETIF_MSG_HW 0x2000 +#define NETIF_MSG_WOL 0x4000 + +#ifndef netif_msg_hw +#define netif_msg_hw(p) ((p)->msg_enable & NETIF_MSG_HW) +#endif +#ifndef netif_msg_wol +#define netif_msg_wol(p) ((p)->msg_enable & NETIF_MSG_WOL) +#endif +#endif /* 2.4.18 */ + +/*****************************************************************************/ + +/*****************************************************************************/ +/* 2.4.20 => 2.4.19 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,20) ) + +/* we won't support NAPI on less than 2.4.20 */ +#ifdef NAPI +#undef NAPI +#undef CONFIG_IXGBE_NAPI +#endif + +#endif /* 2.4.20 => 2.4.19 */ + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#define pci_name(x) ((x)->slot_name) +#endif + +/*****************************************************************************/ +/* 2.4.22 => 2.4.17 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,22) ) +#ifndef IXGBE_NO_LRO +/* Don't enable LRO for these legacy kernels */ +#define IXGBE_NO_LRO +#endif +#endif + +/*****************************************************************************/ +/*****************************************************************************/ +/* 2.4.23 => 2.4.22 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,23) ) +/*****************************************************************************/ +#ifdef NAPI +#ifndef netif_poll_disable +#define netif_poll_disable(x) _kc_netif_poll_disable(x) +static inline void _kc_netif_poll_disable(struct net_device *netdev) +{ + while (test_and_set_bit(__LINK_STATE_RX_SCHED, &netdev->state)) { + /* No hurry */ + current->state = TASK_INTERRUPTIBLE; + schedule_timeout(1); + } +} +#endif +#ifndef netif_poll_enable +#define netif_poll_enable(x) _kc_netif_poll_enable(x) +static inline void _kc_netif_poll_enable(struct net_device *netdev) +{ + clear_bit(__LINK_STATE_RX_SCHED, &netdev->state); +} +#endif +#endif /* NAPI */ +#ifndef netif_tx_disable +#define netif_tx_disable(x) _kc_netif_tx_disable(x) +static inline void _kc_netif_tx_disable(struct net_device *dev) +{ + spin_lock_bh(&dev->xmit_lock); + netif_stop_queue(dev); + spin_unlock_bh(&dev->xmit_lock); +} +#endif +#else /* 2.4.23 => 2.4.22 */ +#define HAVE_SCTP +#endif /* 2.4.23 => 2.4.22 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,25) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) ) +#define ETHTOOL_OPS_COMPAT +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +/* 2.5.71 => 2.4.x */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,71) ) +#define sk_protocol protocol +#define pci_get_device pci_find_device +#endif /* 2.5.70 => 2.4.x */ + +/*****************************************************************************/ +/* < 2.4.27 or 2.6.0 <= 2.6.5 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) || \ + ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) && \ + LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) ) + +#ifndef netif_msg_init +#define netif_msg_init _kc_netif_msg_init +static inline u32 _kc_netif_msg_init(int debug_value, int default_msg_enable_bits) +{ + /* use default */ + if (debug_value < 0 || debug_value >= (sizeof(u32) * 8)) + return default_msg_enable_bits; + if (debug_value == 0) /* no output */ + return 0; + /* set low N bits */ + return (1 << debug_value) -1; +} +#endif + +#endif /* < 2.4.27 or 2.6.0 <= 2.6.5 */ +/*****************************************************************************/ +#if (( LINUX_VERSION_CODE < KERNEL_VERSION(2,4,27) ) || \ + (( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) && \ + ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,3) ))) +#define netdev_priv(x) x->priv +#endif + +/*****************************************************************************/ +/* <= 2.5.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,0) ) +#include +#undef pci_register_driver +#define pci_register_driver pci_module_init + +/* + * Most of the dma compat code is copied/modifed from the 2.4.37 + * /include/linux/libata-compat.h header file + */ +/* These definitions mirror those in pci.h, so they can be used + * interchangeably with their PCI_ counterparts */ +enum dma_data_direction { + DMA_BIDIRECTIONAL = 0, + DMA_TO_DEVICE = 1, + DMA_FROM_DEVICE = 2, + DMA_NONE = 3, +}; + +struct device { + struct pci_dev pdev; +}; + +static inline struct pci_dev *to_pci_dev (struct device *dev) +{ + return (struct pci_dev *) dev; +} +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return (struct device *) pdev; +} + +#define pdev_printk(lvl, pdev, fmt, args...) \ + printk("%s %s: " fmt, lvl, pci_name(pdev), ## args) +#define dev_err(dev, fmt, args...) \ + pdev_printk(KERN_ERR, to_pci_dev(dev), fmt, ## args) +#define dev_info(dev, fmt, args...) \ + pdev_printk(KERN_INFO, to_pci_dev(dev), fmt, ## args) +#define dev_warn(dev, fmt, args...) \ + pdev_printk(KERN_WARNING, to_pci_dev(dev), fmt, ## args) + +/* NOTE: dangerous! we ignore the 'gfp' argument */ +#define dma_alloc_coherent(dev,sz,dma,gfp) \ + pci_alloc_consistent(to_pci_dev(dev),(sz),(dma)) +#define dma_free_coherent(dev,sz,addr,dma_addr) \ + pci_free_consistent(to_pci_dev(dev),(sz),(addr),(dma_addr)) + +#define dma_map_page(dev,a,b,c,d) \ + pci_map_page(to_pci_dev(dev),(a),(b),(c),(d)) +#define dma_unmap_page(dev,a,b,c) \ + pci_unmap_page(to_pci_dev(dev),(a),(b),(c)) + +#define dma_map_single(dev,a,b,c) \ + pci_map_single(to_pci_dev(dev),(a),(b),(c)) +#define dma_unmap_single(dev,a,b,c) \ + pci_unmap_single(to_pci_dev(dev),(a),(b),(c)) + +#define dma_sync_single(dev,a,b,c) \ + pci_dma_sync_single(to_pci_dev(dev),(a),(b),(c)) + +/* for range just sync everything, that's all the pci API can do */ +#define dma_sync_single_range(dev,addr,off,sz,dir) \ + pci_dma_sync_single(to_pci_dev(dev),(addr),(off)+(sz),(dir)) + +#define dma_set_mask(dev,mask) \ + pci_set_dma_mask(to_pci_dev(dev),(mask)) + +/* hlist_* code - double linked lists */ +struct hlist_head { + struct hlist_node *first; +}; + +struct hlist_node { + struct hlist_node *next, **pprev; +}; + +static inline void __hlist_del(struct hlist_node *n) +{ + struct hlist_node *next = n->next; + struct hlist_node **pprev = n->pprev; + *pprev = next; + if (next) + next->pprev = pprev; +} + +static inline void hlist_del(struct hlist_node *n) +{ + __hlist_del(n); + n->next = NULL; + n->pprev = NULL; +} + +static inline void hlist_add_head(struct hlist_node *n, struct hlist_head *h) +{ + struct hlist_node *first = h->first; + n->next = first; + if (first) + first->pprev = &n->next; + h->first = n; + n->pprev = &h->first; +} + +static inline int hlist_empty(const struct hlist_head *h) +{ + return !h->first; +} +#define HLIST_HEAD_INIT { .first = NULL } +#define HLIST_HEAD(name) struct hlist_head name = { .first = NULL } +#define INIT_HLIST_HEAD(ptr) ((ptr)->first = NULL) +static inline void INIT_HLIST_NODE(struct hlist_node *h) +{ + h->next = NULL; + h->pprev = NULL; +} +#define hlist_entry(ptr, type, member) container_of(ptr,type,member) + +#define hlist_for_each_entry(tpos, pos, head, member) \ + for (pos = (head)->first; \ + pos && ({ prefetch(pos->next); 1;}) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = pos->next) + +#define hlist_for_each_entry_safe(tpos, pos, n, head, member) \ + for (pos = (head)->first; \ + pos && ({ n = pos->next; 1; }) && \ + ({ tpos = hlist_entry(pos, typeof(*tpos), member); 1;}); \ + pos = n) + +#ifndef might_sleep +#define might_sleep() +#endif +#else +static inline struct device *pci_dev_to_dev(struct pci_dev *pdev) +{ + return &pdev->dev; +} +#endif /* <= 2.5.0 */ + +/*****************************************************************************/ +/* 2.5.28 => 2.4.23 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,5,28) ) + +static inline void _kc_synchronize_irq(void) +{ + synchronize_irq(); +} +#undef synchronize_irq +#define synchronize_irq(X) _kc_synchronize_irq() + +#include +#define work_struct tq_struct +#undef INIT_WORK +#define INIT_WORK(a,b) INIT_TQUEUE(a,(void (*)(void *))b,a) +#undef container_of +#define container_of list_entry +#define schedule_work schedule_task +#define flush_scheduled_work flush_scheduled_tasks +#define cancel_work_sync(x) flush_scheduled_work() + +#endif /* 2.5.28 => 2.4.17 */ + +/*****************************************************************************/ +/* 2.6.0 => 2.5.28 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#undef get_cpu +#define get_cpu() smp_processor_id() +#undef put_cpu +#define put_cpu() do { } while(0) +#define MODULE_INFO(version, _version) +#ifndef CONFIG_E1000_DISABLE_PACKET_SPLIT +#define CONFIG_E1000_DISABLE_PACKET_SPLIT 1 +#endif +#define CONFIG_IGB_DISABLE_PACKET_SPLIT 1 + +#define dma_set_coherent_mask(dev,mask) 1 + +#undef dev_put +#define dev_put(dev) __dev_put(dev) + +#ifndef skb_fill_page_desc +#define skb_fill_page_desc _kc_skb_fill_page_desc +extern void _kc_skb_fill_page_desc(struct sk_buff *skb, int i, struct page *page, int off, int size); +#endif + +#undef ALIGN +#define ALIGN(x,a) (((x)+(a)-1)&~((a)-1)) + +#ifndef page_count +#define page_count(p) atomic_read(&(p)->count) +#endif + +#ifdef MAX_NUMNODES +#undef MAX_NUMNODES +#endif +#define MAX_NUMNODES 1 + +/* find_first_bit and find_next bit are not defined for most + * 2.4 kernels (except for the redhat 2.4.21 kernels + */ +#include +#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) +#undef find_next_bit +#define find_next_bit _kc_find_next_bit +extern unsigned long _kc_find_next_bit(const unsigned long *addr, + unsigned long size, + unsigned long offset); +#define find_first_bit(addr, size) find_next_bit((addr), (size), 0) + + +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (strchr(dev->name, '%')) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#ifndef strlcpy +#define strlcpy _kc_strlcpy +extern size_t _kc_strlcpy(char *dest, const char *src, size_t size); +#endif /* strlcpy */ + +#endif /* 2.6.0 => 2.5.28 */ + +/*****************************************************************************/ +/* 2.6.4 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +#define MODULE_VERSION(_version) MODULE_INFO(version, _version) +#endif /* 2.6.4 => 2.6.0 */ + +/*****************************************************************************/ +/* 2.6.5 => 2.6.0 */ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,5) ) +#define dma_sync_single_for_cpu dma_sync_single +#define dma_sync_single_for_device dma_sync_single +#define dma_sync_single_range_for_cpu dma_sync_single_range +#define dma_sync_single_range_for_device dma_sync_single_range +#ifndef pci_dma_mapping_error +#define pci_dma_mapping_error _kc_pci_dma_mapping_error +static inline int _kc_pci_dma_mapping_error(dma_addr_t dma_addr) +{ + return dma_addr == 0; +} +#endif +#endif /* 2.6.5 => 2.6.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,4) ) +extern int _kc_scnprintf(char * buf, size_t size, const char *fmt, ...); +#define scnprintf(buf, size, fmt, args...) _kc_scnprintf(buf, size, fmt, ##args) +#endif /* < 2.6.4 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,6) ) +/* taken from 2.6 include/linux/bitmap.h */ +#undef bitmap_zero +#define bitmap_zero _kc_bitmap_zero +static inline void _kc_bitmap_zero(unsigned long *dst, int nbits) +{ + if (nbits <= BITS_PER_LONG) + *dst = 0UL; + else { + int len = BITS_TO_LONGS(nbits) * sizeof(unsigned long); + memset(dst, 0, len); + } +} +#define random_ether_addr _kc_random_ether_addr +static inline void _kc_random_ether_addr(u8 *addr) +{ + get_random_bytes(addr, ETH_ALEN); + addr[0] &= 0xfe; /* clear multicast */ + addr[0] |= 0x02; /* set local assignment */ +} +#define page_to_nid(x) 0 + +#endif /* < 2.6.6 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,7) ) +#undef if_mii +#define if_mii _kc_if_mii +static inline struct mii_ioctl_data *_kc_if_mii(struct ifreq *rq) +{ + return (struct mii_ioctl_data *) &rq->ifr_ifru; +} + +#ifndef __force +#define __force +#endif +#endif /* < 2.6.7 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,8) ) +#ifndef PCI_EXP_DEVCTL +#define PCI_EXP_DEVCTL 8 +#endif +#ifndef PCI_EXP_DEVCTL_CERE +#define PCI_EXP_DEVCTL_CERE 0x0001 +#endif +#define msleep(x) do { set_current_state(TASK_UNINTERRUPTIBLE); \ + schedule_timeout((x * HZ)/1000 + 2); \ + } while (0) + +#endif /* < 2.6.8 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,9)) +#include +#define __iomem + +#ifndef kcalloc +#define kcalloc(n, size, flags) _kc_kzalloc(((n) * (size)), flags) +extern void *_kc_kzalloc(size_t size, int flags); +#endif +#define MSEC_PER_SEC 1000L +static inline unsigned int _kc_jiffies_to_msecs(const unsigned long j) +{ +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (MSEC_PER_SEC / HZ) * j; +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return (j + (HZ / MSEC_PER_SEC) - 1)/(HZ / MSEC_PER_SEC); +#else + return (j * MSEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_msecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_msecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= MSEC_PER_SEC && !(MSEC_PER_SEC % HZ) + return (m + (MSEC_PER_SEC / HZ) - 1) / (MSEC_PER_SEC / HZ); +#elif HZ > MSEC_PER_SEC && !(HZ % MSEC_PER_SEC) + return m * (HZ / MSEC_PER_SEC); +#else + return (m * HZ + MSEC_PER_SEC - 1) / MSEC_PER_SEC; +#endif +} + +#define msleep_interruptible _kc_msleep_interruptible +static inline unsigned long _kc_msleep_interruptible(unsigned int msecs) +{ + unsigned long timeout = _kc_msecs_to_jiffies(msecs) + 1; + + while (timeout && !signal_pending(current)) { + __set_current_state(TASK_INTERRUPTIBLE); + timeout = schedule_timeout(timeout); + } + return _kc_jiffies_to_msecs(timeout); +} + +/* Basic mode control register. */ +#define BMCR_SPEED1000 0x0040 /* MSB of Speed (1000) */ + +#ifndef __le16 +#define __le16 u16 +#endif +#ifndef __le32 +#define __le32 u32 +#endif +#ifndef __le64 +#define __le64 u64 +#endif +#ifndef __be16 +#define __be16 u16 +#endif +#ifndef __be32 +#define __be32 u32 +#endif +#ifndef __be64 +#define __be64 u64 +#endif + +static inline struct vlan_ethhdr *vlan_eth_hdr(const struct sk_buff *skb) +{ + return (struct vlan_ethhdr *)skb->mac.raw; +} + +/* Wake-On-Lan options. */ +#define WAKE_PHY (1 << 0) +#define WAKE_UCAST (1 << 1) +#define WAKE_MCAST (1 << 2) +#define WAKE_BCAST (1 << 3) +#define WAKE_ARP (1 << 4) +#define WAKE_MAGIC (1 << 5) +#define WAKE_MAGICSECURE (1 << 6) /* only meaningful if WAKE_MAGIC */ + +#define skb_header_pointer _kc_skb_header_pointer +static inline void *_kc_skb_header_pointer(const struct sk_buff *skb, + int offset, int len, void *buffer) +{ + int hlen = skb_headlen(skb); + + if (hlen - offset >= len) + return skb->data + offset; + +#ifdef MAX_SKB_FRAGS + if (skb_copy_bits(skb, offset, buffer, len) < 0) + return NULL; + + return buffer; +#else + return NULL; +#endif + +#ifndef NETDEV_TX_OK +#define NETDEV_TX_OK 0 +#endif +#ifndef NETDEV_TX_BUSY +#define NETDEV_TX_BUSY 1 +#endif +#ifndef NETDEV_TX_LOCKED +#define NETDEV_TX_LOCKED -1 +#endif +} + +#ifndef __bitwise +#define __bitwise +#endif +#endif /* < 2.6.9 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,10) ) +#ifdef module_param_array_named +#undef module_param_array_named +#define module_param_array_named(name, array, type, nump, perm) \ + static struct kparam_array __param_arr_##name \ + = { ARRAY_SIZE(array), nump, param_set_##type, param_get_##type, \ + sizeof(array[0]), array }; \ + module_param_call(name, param_array_set, param_array_get, \ + &__param_arr_##name, perm) +#endif /* module_param_array_named */ +/* + * num_online is broken for all < 2.6.10 kernels. This is needed to support + * Node module parameter of ixgbe. + */ +#undef num_online_nodes +#define num_online_nodes(n) 1 +extern DECLARE_BITMAP(_kcompat_node_online_map, MAX_NUMNODES); +#undef node_online_map +#define node_online_map _kcompat_node_online_map +#endif /* < 2.6.10 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,11) ) +#define PCI_D0 0 +#define PCI_D1 1 +#define PCI_D2 2 +#define PCI_D3hot 3 +#define PCI_D3cold 4 +typedef int pci_power_t; +#define pci_choose_state(pdev,state) state +#define PMSG_SUSPEND 3 +#define PCI_EXP_LNKCTL 16 + +#undef NETIF_F_LLTX + +#ifndef ARCH_HAS_PREFETCH +#define prefetch(X) +#endif + +#ifndef NET_IP_ALIGN +#define NET_IP_ALIGN 2 +#endif + +#define KC_USEC_PER_SEC 1000000L +#define usecs_to_jiffies _kc_usecs_to_jiffies +static inline unsigned int _kc_jiffies_to_usecs(const unsigned long j) +{ +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (KC_USEC_PER_SEC / HZ) * j; +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return (j + (HZ / KC_USEC_PER_SEC) - 1)/(HZ / KC_USEC_PER_SEC); +#else + return (j * KC_USEC_PER_SEC) / HZ; +#endif +} +static inline unsigned long _kc_usecs_to_jiffies(const unsigned int m) +{ + if (m > _kc_jiffies_to_usecs(MAX_JIFFY_OFFSET)) + return MAX_JIFFY_OFFSET; +#if HZ <= KC_USEC_PER_SEC && !(KC_USEC_PER_SEC % HZ) + return (m + (KC_USEC_PER_SEC / HZ) - 1) / (KC_USEC_PER_SEC / HZ); +#elif HZ > KC_USEC_PER_SEC && !(HZ % KC_USEC_PER_SEC) + return m * (HZ / KC_USEC_PER_SEC); +#else + return (m * HZ + KC_USEC_PER_SEC - 1) / KC_USEC_PER_SEC; +#endif +} +#endif /* < 2.6.11 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,12) ) +#include +#define USE_REBOOT_NOTIFIER + +/* Generic MII registers. */ +#define MII_CTRL1000 0x09 /* 1000BASE-T control */ +#define MII_STAT1000 0x0a /* 1000BASE-T status */ +/* Advertisement control register. */ +#define ADVERTISE_PAUSE_CAP 0x0400 /* Try for pause */ +#define ADVERTISE_PAUSE_ASYM 0x0800 /* Try for asymmetric pause */ +/* 1000BASE-T Control register */ +#define ADVERTISE_1000FULL 0x0200 /* Advertise 1000BASE-T full duplex */ +#ifndef is_zero_ether_addr +#define is_zero_ether_addr _kc_is_zero_ether_addr +static inline int _kc_is_zero_ether_addr(const u8 *addr) +{ + return !(addr[0] | addr[1] | addr[2] | addr[3] | addr[4] | addr[5]); +} +#endif /* is_zero_ether_addr */ +#ifndef is_multicast_ether_addr +#define is_multicast_ether_addr _kc_is_multicast_ether_addr +static inline int _kc_is_multicast_ether_addr(const u8 *addr) +{ + return addr[0] & 0x01; +} +#endif /* is_multicast_ether_addr */ +#endif /* < 2.6.12 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,13) ) +#ifndef kstrdup +#define kstrdup _kc_kstrdup +extern char *_kc_kstrdup(const char *s, unsigned int gfp); +#endif +#endif /* < 2.6.13 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,14) ) +#define pm_message_t u32 +#ifndef kzalloc +#define kzalloc _kc_kzalloc +extern void *_kc_kzalloc(size_t size, int flags); +#endif + +/* Generic MII registers. */ +#define MII_ESTATUS 0x0f /* Extended Status */ +/* Basic mode status register. */ +#define BMSR_ESTATEN 0x0100 /* Extended Status in R15 */ +/* Extended status register. */ +#define ESTATUS_1000_TFULL 0x2000 /* Can do 1000BT Full */ +#define ESTATUS_1000_THALF 0x1000 /* Can do 1000BT Half */ + +#define ADVERTISED_Pause (1 << 13) +#define ADVERTISED_Asym_Pause (1 << 14) + +#if (!(RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,3)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)))) +#if ((LINUX_VERSION_CODE == KERNEL_VERSION(2,6,9)) && !defined(gfp_t)) +#define gfp_t unsigned +#else +typedef unsigned gfp_t; +#endif +#endif /* !RHEL4.3->RHEL5.0 */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,9) ) +#ifdef CONFIG_X86_64 +#define dma_sync_single_range_for_cpu(dev, dma_handle, offset, size, dir) \ + dma_sync_single_for_cpu(dev, dma_handle, size, dir) +#define dma_sync_single_range_for_device(dev, dma_handle, offset, size, dir) \ + dma_sync_single_for_device(dev, dma_handle, size, dir) +#endif +#endif +#endif /* < 2.6.14 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,15) ) +#ifndef vmalloc_node +#define vmalloc_node(a,b) vmalloc(a) +#endif /* vmalloc_node*/ + +#define setup_timer(_timer, _function, _data) \ +do { \ + (_timer)->function = _function; \ + (_timer)->data = _data; \ + init_timer(_timer); \ +} while (0) +#ifndef device_can_wakeup +#define device_can_wakeup(dev) (1) +#endif +#ifndef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) do{}while(0) +#endif +#ifndef device_init_wakeup +#define device_init_wakeup(dev,val) do {} while (0) +#endif +static inline unsigned _kc_compare_ether_addr(const u8 *addr1, const u8 *addr2) +{ + const u16 *a = (const u16 *) addr1; + const u16 *b = (const u16 *) addr2; + + return ((a[0] ^ b[0]) | (a[1] ^ b[1]) | (a[2] ^ b[2])) != 0; +} +#undef compare_ether_addr +#define compare_ether_addr(addr1, addr2) _kc_compare_ether_addr(addr1, addr2) +#endif /* < 2.6.15 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,16) ) +#undef DEFINE_MUTEX +#define DEFINE_MUTEX(x) DECLARE_MUTEX(x) +#define mutex_lock(x) down_interruptible(x) +#define mutex_unlock(x) up(x) + +#ifndef ____cacheline_internodealigned_in_smp +#ifdef CONFIG_SMP +#define ____cacheline_internodealigned_in_smp ____cacheline_aligned_in_smp +#else +#define ____cacheline_internodealigned_in_smp +#endif /* CONFIG_SMP */ +#endif /* ____cacheline_internodealigned_in_smp */ +#undef HAVE_PCI_ERS +#else /* 2.6.16 and above */ +#undef HAVE_PCI_ERS +#define HAVE_PCI_ERS +#endif /* < 2.6.16 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,17) ) +#ifndef first_online_node +#define first_online_node 0 +#endif +#ifndef NET_SKB_PAD +#define NET_SKB_PAD 16 +#endif +#endif /* < 2.6.17 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,18) ) + +#ifndef IRQ_HANDLED +#define irqreturn_t void +#define IRQ_HANDLED +#define IRQ_NONE +#endif + +#ifndef IRQF_PROBE_SHARED +#ifdef SA_PROBEIRQ +#define IRQF_PROBE_SHARED SA_PROBEIRQ +#else +#define IRQF_PROBE_SHARED 0 +#endif +#endif + +#ifndef IRQF_SHARED +#define IRQF_SHARED SA_SHIRQ +#endif + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0])) +#endif + +#ifndef FIELD_SIZEOF +#define FIELD_SIZEOF(t, f) (sizeof(((t*)0)->f)) +#endif + +#ifndef skb_is_gso +#ifdef NETIF_F_TSO +#define skb_is_gso _kc_skb_is_gso +static inline int _kc_skb_is_gso(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_size; +} +#else +#define skb_is_gso(a) 0 +#endif +#endif + +#ifndef resource_size_t +#define resource_size_t unsigned long +#endif + +#ifdef skb_pad +#undef skb_pad +#endif +#define skb_pad(x,y) _kc_skb_pad(x, y) +int _kc_skb_pad(struct sk_buff *skb, int pad); +#ifdef skb_padto +#undef skb_padto +#endif +#define skb_padto(x,y) _kc_skb_padto(x, y) +static inline int _kc_skb_padto(struct sk_buff *skb, unsigned int len) +{ + unsigned int size = skb->len; + if(likely(size >= len)) + return 0; + return _kc_skb_pad(skb, len - size); +} + +#ifndef DECLARE_PCI_UNMAP_ADDR +#define DECLARE_PCI_UNMAP_ADDR(ADDR_NAME) \ + dma_addr_t ADDR_NAME +#define DECLARE_PCI_UNMAP_LEN(LEN_NAME) \ + u32 LEN_NAME +#define pci_unmap_addr(PTR, ADDR_NAME) \ + ((PTR)->ADDR_NAME) +#define pci_unmap_addr_set(PTR, ADDR_NAME, VAL) \ + (((PTR)->ADDR_NAME) = (VAL)) +#define pci_unmap_len(PTR, LEN_NAME) \ + ((PTR)->LEN_NAME) +#define pci_unmap_len_set(PTR, LEN_NAME, VAL) \ + (((PTR)->LEN_NAME) = (VAL)) +#endif /* DECLARE_PCI_UNMAP_ADDR */ +#endif /* < 2.6.18 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,19) ) + +#ifndef DIV_ROUND_UP +#define DIV_ROUND_UP(n,d) (((n) + (d) - 1) / (d)) +#endif +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,0) ) +#if (!((RHEL_RELEASE_CODE && \ + ((RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(4,4) && \ + RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0)) || \ + (RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,0)))) || \ + (AX_RELEASE_CODE && AX_RELEASE_CODE > AX_RELEASE_VERSION(3,0)))) +typedef irqreturn_t (*irq_handler_t)(int, void*, struct pt_regs *); +#endif +#if (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(6,0)) +#undef CONFIG_INET_LRO +#undef CONFIG_INET_LRO_MODULE +#undef CONFIG_FCOE +#undef CONFIG_FCOE_MODULE +#endif +typedef irqreturn_t (*new_handler_t)(int, void*); +static inline irqreturn_t _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#else /* 2.4.x */ +typedef void (*irq_handler_t)(int, void*, struct pt_regs *); +typedef void (*new_handler_t)(int, void*); +static inline int _kc_request_irq(unsigned int irq, new_handler_t handler, unsigned long flags, const char *devname, void *dev_id) +#endif /* >= 2.5.x */ +{ + irq_handler_t new_handler = (irq_handler_t) handler; + return request_irq(irq, new_handler, flags, devname, dev_id); +} + +#undef request_irq +#define request_irq(irq, handler, flags, devname, dev_id) _kc_request_irq((irq), (handler), (flags), (devname), (dev_id)) + +#define irq_handler_t new_handler_t +/* pci_restore_state and pci_save_state handles MSI/PCIE from 2.6.19 */ +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4))) +#define PCIE_CONFIG_SPACE_LEN 256 +#define PCI_CONFIG_SPACE_LEN 64 +#define PCIE_LINK_STATUS 0x12 +#define pci_config_space_ich8lan() do {} while(0) +#undef pci_save_state +extern int _kc_pci_save_state(struct pci_dev *); +#define pci_save_state(pdev) _kc_pci_save_state(pdev) +#undef pci_restore_state +extern void _kc_pci_restore_state(struct pci_dev *); +#define pci_restore_state(pdev) _kc_pci_restore_state(pdev) +#endif /* !(RHEL_RELEASE_CODE >= RHEL 5.4) */ + +#ifdef HAVE_PCI_ERS +#undef free_netdev +extern void _kc_free_netdev(struct net_device *); +#define free_netdev(netdev) _kc_free_netdev(netdev) +#endif +static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) +{ + return 0; +} +#define pci_disable_pcie_error_reporting(dev) do {} while (0) +#define pci_cleanup_aer_uncorrect_error_status(dev) do {} while (0) + +extern void *_kc_kmemdup(const void *src, size_t len, unsigned gfp); +#define kmemdup(src, len, gfp) _kc_kmemdup(src, len, gfp) +#ifndef bool +#define bool _Bool +#define true 1 +#define false 0 +#endif +#else /* 2.6.19 */ +#include +#include +#endif /* < 2.6.19 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,20) ) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,5,28) ) +#undef INIT_WORK +#define INIT_WORK(_work, _func) \ +do { \ + INIT_LIST_HEAD(&(_work)->entry); \ + (_work)->pending = 0; \ + (_work)->func = (void (*)(void *))_func; \ + (_work)->data = _work; \ + init_timer(&(_work)->timer); \ +} while (0) +#endif + +#ifndef PCI_VDEVICE +#define PCI_VDEVICE(ven, dev) \ + PCI_VENDOR_ID_##ven, (dev), \ + PCI_ANY_ID, PCI_ANY_ID, 0, 0 +#endif + +#ifndef round_jiffies +#define round_jiffies(x) x +#endif + +#define csum_offset csum + +#define HAVE_EARLY_VMALLOC_NODE +#define dev_to_node(dev) -1 +#undef set_dev_node +/* remove compiler warning with b=b, for unused variable */ +#define set_dev_node(a, b) do { (b) = (b); } while(0) + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,6)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +typedef __u16 __bitwise __sum16; +typedef __u32 __bitwise __wsum; +#endif + +#if (!(RHEL_RELEASE_CODE && \ + (((RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(4,7)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(5,0))) || \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(5,4)))) && \ + !(SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(10,2,0))) +static inline __wsum csum_unfold(__sum16 n) +{ + return (__force __wsum)n; +} +#endif + +#else /* < 2.6.20 */ +#define HAVE_DEVICE_NUMA_NODE +#endif /* < 2.6.20 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define to_net_dev(class) container_of(class, struct net_device, class_dev) +#define NETDEV_CLASS_DEV +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5))) +#define vlan_group_get_device(vg, id) (vg->vlan_devices[id]) +#define vlan_group_set_device(vg, id, dev) \ + do { \ + if (vg) vg->vlan_devices[id] = dev; \ + } while (0) +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,5)) */ +#define pci_channel_offline(pdev) (pdev->error_state && \ + pdev->error_state != pci_channel_io_normal) +#define pci_request_selected_regions(pdev, bars, name) \ + pci_request_regions(pdev, name) +#define pci_release_selected_regions(pdev, bars) pci_release_regions(pdev); +#endif /* < 2.6.21 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define tcp_hdr(skb) (skb->h.th) +#define tcp_hdrlen(skb) (skb->h.th->doff << 2) +#define skb_transport_offset(skb) (skb->h.raw - skb->data) +#define skb_transport_header(skb) (skb->h.raw) +#define ipv6_hdr(skb) (skb->nh.ipv6h) +#define ip_hdr(skb) (skb->nh.iph) +#define skb_network_offset(skb) (skb->nh.raw - skb->data) +#define skb_network_header(skb) (skb->nh.raw) +#define skb_tail_pointer(skb) skb->tail +#define skb_reset_tail_pointer(skb) \ + do { \ + skb->tail = skb->data; \ + } while (0) +#define skb_copy_to_linear_data(skb, from, len) \ + memcpy(skb->data, from, len) +#define skb_copy_to_linear_data_offset(skb, offset, from, len) \ + memcpy(skb->data + offset, from, len) +#define skb_network_header_len(skb) (skb->h.raw - skb->nh.raw) +#define pci_register_driver pci_module_init +#define skb_mac_header(skb) skb->mac.raw + +#ifdef NETIF_F_MULTI_QUEUE +#ifndef alloc_etherdev_mq +#define alloc_etherdev_mq(_a, _b) alloc_etherdev(_a) +#endif +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef ETH_FCS_LEN +#define ETH_FCS_LEN 4 +#endif +#define cancel_work_sync(x) flush_scheduled_work() +#ifndef udp_hdr +#define udp_hdr _udp_hdr +static inline struct udphdr *_udp_hdr(const struct sk_buff *skb) +{ + return (struct udphdr *)skb_transport_header(skb); +} +#endif + +#ifdef cpu_to_be16 +#undef cpu_to_be16 +#endif +#define cpu_to_be16(x) __constant_htons(x) + +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1))) +enum { + DUMP_PREFIX_NONE, + DUMP_PREFIX_ADDRESS, + DUMP_PREFIX_OFFSET +}; +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(5,1)) */ +#ifndef hex_asc +#define hex_asc(x) "0123456789abcdef"[x] +#endif +#include +extern void _kc_print_hex_dump(const char *level, const char *prefix_str, + int prefix_type, int rowsize, int groupsize, + const void *buf, size_t len, bool ascii); +#define print_hex_dump(lvl, s, t, r, g, b, l, a) \ + _kc_print_hex_dump(lvl, s, t, r, g, b, l, a) +#else /* 2.6.22 */ +#define ETH_TYPE_TRANS_SETS_DEV +#define HAVE_NETDEV_STATS_IN_NETDEV +#endif /* < 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,22) ) +#endif /* > 2.6.22 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23) ) +#define netif_subqueue_stopped(_a, _b) 0 +#ifndef PTR_ALIGN +#define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) +#endif + +#ifndef CONFIG_PM_SLEEP +#define CONFIG_PM_SLEEP CONFIG_PM +#endif + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,13) ) +#define HAVE_ETHTOOL_GET_PERM_ADDR +#endif /* 2.6.14 through 2.6.22 */ +#endif /* < 2.6.23 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) ) +#ifndef ETH_FLAG_LRO +#define ETH_FLAG_LRO NETIF_F_LRO +#endif + +/* if GRO is supported then the napi struct must already exist */ +#ifndef NETIF_F_GRO +/* NAPI API changes in 2.6.24 break everything */ +struct napi_struct { + /* used to look up the real NAPI polling routine */ + int (*poll)(struct napi_struct *, int); + struct net_device *dev; + int weight; +}; +#endif + +#ifdef NAPI +extern int __kc_adapter_clean(struct net_device *, int *); +extern struct net_device *napi_to_poll_dev(struct napi_struct *napi); +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = (_napi); \ + struct net_device *poll_dev = napi_to_poll_dev(__napi); \ + poll_dev->poll = &(__kc_adapter_clean); \ + poll_dev->priv = (_napi); \ + poll_dev->weight = (_weight); \ + set_bit(__LINK_STATE_RX_SCHED, &poll_dev->state); \ + set_bit(__LINK_STATE_START, &poll_dev->state);\ + dev_hold(poll_dev); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_napi) \ + do { \ + struct net_device *poll_dev = napi_to_poll_dev(_napi); \ + WARN_ON(!test_bit(__LINK_STATE_RX_SCHED, &poll_dev->state)); \ + dev_put(poll_dev); \ + memset(poll_dev, 0, sizeof(struct net_device));\ + } while (0) +#define napi_schedule_prep(_napi) \ + (netif_running((_napi)->dev) && netif_rx_schedule_prep(napi_to_poll_dev(_napi))) +#define napi_schedule(_napi) \ + do { \ + if (napi_schedule_prep(_napi)) \ + __netif_rx_schedule(napi_to_poll_dev(_napi)); \ + } while (0) +#define napi_enable(_napi) netif_poll_enable(napi_to_poll_dev(_napi)) +#define napi_disable(_napi) netif_poll_disable(napi_to_poll_dev(_napi)) +#define __napi_schedule(_napi) __netif_rx_schedule(napi_to_poll_dev(_napi)) +#ifndef NETIF_F_GRO +#define napi_complete(_napi) netif_rx_complete(napi_to_poll_dev(_napi)) +#else +#define napi_complete(_napi) \ + do { \ + napi_gro_flush(_napi); \ + netif_rx_complete(napi_to_poll_dev(_napi)); \ + } while (0) +#endif /* NETIF_F_GRO */ +#else /* NAPI */ +#define netif_napi_add(_netdev, _napi, _poll, _weight) \ + do { \ + struct napi_struct *__napi = _napi; \ + _netdev->poll = &(_poll); \ + _netdev->weight = (_weight); \ + __napi->poll = &(_poll); \ + __napi->weight = (_weight); \ + __napi->dev = (_netdev); \ + } while (0) +#define netif_napi_del(_a) do {} while (0) +#endif /* NAPI */ + +#undef dev_get_by_name +#define dev_get_by_name(_a, _b) dev_get_by_name(_b) +#define __netif_subqueue_stopped(_a, _b) netif_subqueue_stopped(_a, _b) +#ifndef DMA_BIT_MASK +#define DMA_BIT_MASK(n) (((n) == 64) ? DMA_64BIT_MASK : ((1ULL<<(n))-1)) +#endif + +#ifdef NETIF_F_TSO6 +#define skb_is_gso_v6 _kc_skb_is_gso_v6 +static inline int _kc_skb_is_gso_v6(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->gso_type & SKB_GSO_TCPV6; +} +#endif /* NETIF_F_TSO6 */ + +#ifndef KERN_CONT +#define KERN_CONT "" +#endif +#else /* < 2.6.24 */ +#define HAVE_ETHTOOL_GET_SSET_COUNT +#define HAVE_NETDEV_NAPI_LIST +#endif /* < 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,24) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#include +#else /* >= 3.2.0 */ +#include +#endif /* else >= 3.2.0 */ +#endif /* > 2.6.24 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) ) +#define PM_QOS_CPU_DMA_LATENCY 1 + +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,6,18) ) +#include +#define PM_QOS_DEFAULT_VALUE INFINITE_LATENCY +#define pm_qos_add_requirement(pm_qos_class, name, value) \ + set_acceptable_latency(name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) \ + remove_acceptable_latency(name) +#define pm_qos_update_requirement(pm_qos_class, name, value) \ + modify_acceptable_latency(name, value) +#else +#define PM_QOS_DEFAULT_VALUE -1 +#define pm_qos_add_requirement(pm_qos_class, name, value) +#define pm_qos_remove_requirement(pm_qos_class, name) +#define pm_qos_update_requirement(pm_qos_class, name, value) { \ + if (value != PM_QOS_DEFAULT_VALUE) { \ + printk(KERN_WARNING "%s: unable to set PM QoS requirement\n", \ + pci_name(adapter->pdev)); \ + } \ +} + +#endif /* > 2.6.18 */ + +#define pci_enable_device_mem(pdev) pci_enable_device(pdev) + +#ifndef DEFINE_PCI_DEVICE_TABLE +#define DEFINE_PCI_DEVICE_TABLE(_table) struct pci_device_id _table[] +#endif /* DEFINE_PCI_DEVICE_TABLE */ + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,0) ) +#ifndef IXGBE_PROCFS +#define IXGBE_PROCFS +#endif /* IXGBE_PROCFS */ +#endif /* >= 2.6.0 */ + + +#else /* < 2.6.25 */ + +#ifndef IXGBE_SYSFS +#define IXGBE_SYSFS +#endif /* IXGBE_SYSFS */ + + +#endif /* < 2.6.25 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26) ) +#ifndef clamp_t +#define clamp_t(type, val, min, max) ({ \ + type __val = (val); \ + type __min = (min); \ + type __max = (max); \ + __val = __val < __min ? __min : __val; \ + __val > __max ? __max : __val; }) +#endif /* clamp_t */ +#ifdef NETIF_F_TSO +#ifdef NETIF_F_TSO6 +#define netif_set_gso_max_size(_netdev, size) \ + do { \ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) { \ + _netdev->features &= ~NETIF_F_TSO; \ + _netdev->features &= ~NETIF_F_TSO6; \ + } else { \ + _netdev->features |= NETIF_F_TSO; \ + _netdev->features |= NETIF_F_TSO6; \ + } \ + } while (0) +#else /* NETIF_F_TSO6 */ +#define netif_set_gso_max_size(_netdev, size) \ + do { \ + if (adapter->flags & IXGBE_FLAG_DCB_ENABLED) \ + _netdev->features &= ~NETIF_F_TSO; \ + else \ + _netdev->features |= NETIF_F_TSO; \ + } while (0) +#endif /* NETIF_F_TSO6 */ +#else +#define netif_set_gso_max_size(_netdev, size) do {} while (0) +#endif /* NETIF_F_TSO */ +#undef kzalloc_node +#define kzalloc_node(_size, _flags, _node) kzalloc(_size, _flags) + +extern void _kc_pci_disable_link_state(struct pci_dev *dev, int state); +#define pci_disable_link_state(p, s) _kc_pci_disable_link_state(p, s) +#else /* < 2.6.26 */ +#include +#define HAVE_NETDEV_VLAN_FEATURES +#endif /* < 2.6.26 */ +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,27) ) +static inline void _kc_ethtool_cmd_speed_set(struct ethtool_cmd *ep, + __u32 speed) +{ + ep->speed = (__u16)speed; + /* ep->speed_hi = (__u16)(speed >> 16); */ +} +#define ethtool_cmd_speed_set _kc_ethtool_cmd_speed_set + +static inline __u32 _kc_ethtool_cmd_speed(struct ethtool_cmd *ep) +{ + /* no speed_hi before 2.6.27, and probably no need for it yet */ + return (__u32)ep->speed; +} +#define ethtool_cmd_speed _kc_ethtool_cmd_speed + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,15) ) +#if ((LINUX_VERSION_CODE < KERNEL_VERSION(2,6,23)) && defined(CONFIG_PM)) +#define ANCIENT_PM 1 +#elif ((LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,23)) && \ + (LINUX_VERSION_CODE < KERNEL_VERSION(2,6,26)) && \ + defined(CONFIG_PM_SLEEP)) +#define NEWER_PM 1 +#endif +#if defined(ANCIENT_PM) || defined(NEWER_PM) +#undef device_set_wakeup_enable +#define device_set_wakeup_enable(dev, val) \ + do { \ + u16 pmc = 0; \ + int pm = pci_find_capability(adapter->pdev, PCI_CAP_ID_PM); \ + if (pm) { \ + pci_read_config_word(adapter->pdev, pm + PCI_PM_PMC, \ + &pmc); \ + } \ + (dev)->power.can_wakeup = !!(pmc >> 11); \ + (dev)->power.should_wakeup = (val && (pmc >> 11)); \ + } while (0) +#endif /* 2.6.15-2.6.22 and CONFIG_PM or 2.6.23-2.6.25 and CONFIG_PM_SLEEP */ +#endif /* 2.6.15 through 2.6.27 */ +#ifndef netif_napi_del +#define netif_napi_del(_a) do {} while (0) +#ifdef NAPI +#ifdef CONFIG_NETPOLL +#undef netif_napi_del +#define netif_napi_del(_a) list_del(&(_a)->dev_list); +#endif +#endif +#endif /* netif_napi_del */ +#ifdef dma_mapping_error +#undef dma_mapping_error +#endif +#define dma_mapping_error(dev, dma_addr) pci_dma_mapping_error(dma_addr) + +#ifdef CONFIG_NETDEVICES_MULTIQUEUE +#define HAVE_TX_MQ +#endif + +#ifdef HAVE_TX_MQ +extern void _kc_netif_tx_stop_all_queues(struct net_device *); +extern void _kc_netif_tx_wake_all_queues(struct net_device *); +extern void _kc_netif_tx_start_all_queues(struct net_device *); +#define netif_tx_stop_all_queues(a) _kc_netif_tx_stop_all_queues(a) +#define netif_tx_wake_all_queues(a) _kc_netif_tx_wake_all_queues(a) +#define netif_tx_start_all_queues(a) _kc_netif_tx_start_all_queues(a) +#undef netif_stop_subqueue +#define netif_stop_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_stop_subqueue((_ndev), (_qi)); \ + else \ + netif_stop_queue((_ndev)); \ + } while (0) +#undef netif_start_subqueue +#define netif_start_subqueue(_ndev,_qi) do { \ + if (netif_is_multiqueue((_ndev))) \ + netif_start_subqueue((_ndev), (_qi)); \ + else \ + netif_start_queue((_ndev)); \ + } while (0) +#else /* HAVE_TX_MQ */ +#define netif_tx_stop_all_queues(a) netif_stop_queue(a) +#define netif_tx_wake_all_queues(a) netif_wake_queue(a) +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,12) ) +#define netif_tx_start_all_queues(a) netif_start_queue(a) +#else +#define netif_tx_start_all_queues(a) do {} while (0) +#endif +#define netif_stop_subqueue(_ndev,_qi) netif_stop_queue((_ndev)) +#define netif_start_subqueue(_ndev,_qi) netif_start_queue((_ndev)) +#endif /* HAVE_TX_MQ */ +#ifndef NETIF_F_MULTI_QUEUE +#define NETIF_F_MULTI_QUEUE 0 +#define netif_is_multiqueue(a) 0 +#define netif_wake_subqueue(a, b) +#endif /* NETIF_F_MULTI_QUEUE */ + +#ifndef __WARN_printf +extern void __kc_warn_slowpath(const char *file, const int line, + const char *fmt, ...) __attribute__((format(printf, 3, 4))); +#define __WARN_printf(arg...) __kc_warn_slowpath(__FILE__, __LINE__, arg) +#endif /* __WARN_printf */ + +#ifndef WARN +#define WARN(condition, format...) ({ \ + int __ret_warn_on = !!(condition); \ + if (unlikely(__ret_warn_on)) \ + __WARN_printf(format); \ + unlikely(__ret_warn_on); \ +}) +#endif /* WARN */ +#else /* < 2.6.27 */ +#define HAVE_TX_MQ +#define HAVE_NETDEV_SELECT_QUEUE +#endif /* < 2.6.27 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,28) ) +#define pci_ioremap_bar(pdev, bar) ioremap(pci_resource_start(pdev, bar), \ + pci_resource_len(pdev, bar)) +#define pci_wake_from_d3 _kc_pci_wake_from_d3 +#define pci_prepare_to_sleep _kc_pci_prepare_to_sleep +extern int _kc_pci_wake_from_d3(struct pci_dev *dev, bool enable); +extern int _kc_pci_prepare_to_sleep(struct pci_dev *dev); +#define netdev_alloc_page(a) alloc_page(GFP_ATOMIC) +#ifndef __skb_queue_head_init +static inline void __kc_skb_queue_head_init(struct sk_buff_head *list) +{ + list->prev = list->next = (struct sk_buff *)list; + list->qlen = 0; +} +#define __skb_queue_head_init(_q) __kc_skb_queue_head_init(_q) +#endif +#endif /* < 2.6.28 */ + +#ifndef skb_add_rx_frag +#define skb_add_rx_frag _kc_skb_add_rx_frag +extern void _kc_skb_add_rx_frag(struct sk_buff *, int, struct page *, int, int); +#endif + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,29) ) +#ifndef swap +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) +#endif +#define pci_request_selected_regions_exclusive(pdev, bars, name) \ + pci_request_selected_regions(pdev, bars, name) +#ifndef CONFIG_NR_CPUS +#define CONFIG_NR_CPUS 1 +#endif /* CONFIG_NR_CPUS */ +#ifndef pcie_aspm_enabled +#define pcie_aspm_enabled() (1) +#endif /* pcie_aspm_enabled */ +#else /* < 2.6.29 */ +#ifndef HAVE_NET_DEVICE_OPS +#define HAVE_NET_DEVICE_OPS +#endif +#ifdef CONFIG_DCB +#define HAVE_PFC_MODE_ENABLE +#endif /* CONFIG_DCB */ +#endif /* < 2.6.29 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,30) ) +#define skb_rx_queue_recorded(a) false +#define skb_get_rx_queue(a) 0 +#undef CONFIG_FCOE +#undef CONFIG_FCOE_MODULE +extern u16 _kc_skb_tx_hash(struct net_device *dev, struct sk_buff *skb); +#define skb_tx_hash(n, s) _kc_skb_tx_hash(n, s) +#define skb_record_rx_queue(a, b) do {} while (0) +#ifndef CONFIG_PCI_IOV +#undef pci_enable_sriov +#define pci_enable_sriov(a, b) -ENOTSUPP +#undef pci_disable_sriov +#define pci_disable_sriov(a) do {} while (0) +#endif /* CONFIG_PCI_IOV */ +#ifndef pr_cont +#define pr_cont(fmt, ...) \ + printk(KERN_CONT fmt, ##__VA_ARGS__) +#endif /* pr_cont */ +#else +#define HAVE_ASPM_QUIRKS +#endif /* < 2.6.30 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,31) ) +#define ETH_P_1588 0x88F7 +#define ETH_P_FIP 0x8914 +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc_count) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(uclist, dev) \ + for (uclist = dev->uc_list; uclist; uclist = uclist->next) +#endif +#else +#ifndef HAVE_NETDEV_STORAGE_ADDRESS +#define HAVE_NETDEV_STORAGE_ADDRESS +#endif +#ifndef HAVE_NETDEV_HW_ADDR +#define HAVE_NETDEV_HW_ADDR +#endif +#ifndef HAVE_TRANS_START_IN_QUEUE +#define HAVE_TRANS_START_IN_QUEUE +#endif +#endif /* < 2.6.31 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,32) ) +#undef netdev_tx_t +#define netdev_tx_t int +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef NETIF_F_FCOE_MTU +#define NETIF_F_FCOE_MTU (1 << 26) +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ + +#ifndef pm_runtime_get_sync +#define pm_runtime_get_sync(dev) do {} while (0) +#endif +#ifndef pm_runtime_put +#define pm_runtime_put(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_sync +#define pm_runtime_put_sync(dev) do {} while (0) +#endif +#ifndef pm_runtime_resume +#define pm_runtime_resume(dev) do {} while (0) +#endif +#ifndef pm_schedule_suspend +#define pm_schedule_suspend(dev, t) do {} while (0) +#endif +#ifndef pm_runtime_set_suspended +#define pm_runtime_set_suspended(dev) do {} while (0) +#endif +#ifndef pm_runtime_disable +#define pm_runtime_disable(dev) do {} while (0) +#endif +#ifndef pm_runtime_put_noidle +#define pm_runtime_put_noidle(dev) do {} while (0) +#endif +#ifndef pm_runtime_set_active +#define pm_runtime_set_active(dev) do {} while (0) +#endif +#ifndef pm_runtime_enable +#define pm_runtime_enable(dev) do {} while (0) +#endif +#ifndef pm_runtime_get_noresume +#define pm_runtime_get_noresume(dev) do {} while (0) +#endif +#else /* < 2.6.32 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_ENABLE +#define HAVE_NETDEV_OPS_FCOE_ENABLE +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_OPS_GETAPP +#define HAVE_DCBNL_OPS_GETAPP +#endif +#endif /* CONFIG_DCB */ +#include +/* IOV bad DMA target work arounds require at least this kernel rev support */ +#define HAVE_PCIE_TYPE +#endif /* < 2.6.32 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,33) ) +#ifndef pci_pcie_cap +#define pci_pcie_cap(pdev) pci_find_capability(pdev, PCI_CAP_ID_EXP) +#endif +#ifndef IPV4_FLOW +#define IPV4_FLOW 0x10 +#endif /* IPV4_FLOW */ +#ifndef IPV6_FLOW +#define IPV6_FLOW 0x11 +#endif /* IPV6_FLOW */ +/* Features back-ported to RHEL6 or SLES11 SP1 after 2.6.32 */ +#if ( (RHEL_RELEASE_CODE && RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,0)) || \ + (SLE_VERSION_CODE && SLE_VERSION_CODE >= SLE_VERSION(11,1,0)) ) +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#endif /* RHEL6 or SLES11 SP1 */ +#ifndef __percpu +#define __percpu +#endif /* __percpu */ +#else /* < 2.6.33 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_GETWWN +#define HAVE_NETDEV_OPS_FCOE_GETWWN +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#define HAVE_ETHTOOL_SFP_DISPLAY_PORT +#endif /* < 2.6.33 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,34) ) +#ifndef ETH_FLAG_NTUPLE +#define ETH_FLAG_NTUPLE NETIF_F_NTUPLE +#endif + +#ifndef netdev_mc_count +#define netdev_mc_count(dev) ((dev)->mc_count) +#endif +#ifndef netdev_mc_empty +#define netdev_mc_empty(dev) (netdev_mc_count(dev) == 0) +#endif +#ifndef netdev_for_each_mc_addr +#define netdev_for_each_mc_addr(mclist, dev) \ + for (mclist = dev->mc_list; mclist; mclist = mclist->next) +#endif +#ifndef netdev_uc_count +#define netdev_uc_count(dev) ((dev)->uc.count) +#endif +#ifndef netdev_uc_empty +#define netdev_uc_empty(dev) (netdev_uc_count(dev) == 0) +#endif +#ifndef netdev_for_each_uc_addr +#define netdev_for_each_uc_addr(ha, dev) \ + list_for_each_entry(ha, &dev->uc.list, list) +#endif +#ifndef dma_set_coherent_mask +#define dma_set_coherent_mask(dev,mask) \ + pci_set_consistent_dma_mask(to_pci_dev(dev),(mask)) +#endif +#ifndef pci_dev_run_wake +#define pci_dev_run_wake(pdev) (0) +#endif + +/* netdev logging taken from include/linux/netdevice.h */ +#ifndef netdev_name +static inline const char *_kc_netdev_name(const struct net_device *dev) +{ + if (dev->reg_state != NETREG_REGISTERED) + return "(unregistered net_device)"; + return dev->name; +} +#define netdev_name(netdev) _kc_netdev_name(netdev) +#endif /* netdev_name */ + +#undef netdev_printk +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,0) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct adapter_struct *kc_adapter = netdev_priv(netdev);\ + struct pci_dev *pdev = kc_adapter->pdev; \ + printk("%s %s: " format, level, pci_name(pdev), \ + ##args); \ +} while(0) +#elif ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,21) ) +#define netdev_printk(level, netdev, format, args...) \ +do { \ + struct adapter_struct *kc_adapter = netdev_priv(netdev);\ + struct pci_dev *pdev = kc_adapter->pdev; \ + struct device *dev = pci_dev_to_dev(pdev); \ + dev_printk(level, dev, "%s: " format, \ + netdev_name(netdev), ##args); \ +} while(0) +#else /* 2.6.21 => 2.6.34 */ +#define netdev_printk(level, netdev, format, args...) \ + dev_printk(level, (netdev)->dev.parent, \ + "%s: " format, \ + netdev_name(netdev), ##args) +#endif /* <2.6.0 <2.6.21 <2.6.34 */ +#undef netdev_emerg +#define netdev_emerg(dev, format, args...) \ + netdev_printk(KERN_EMERG, dev, format, ##args) +#undef netdev_alert +#define netdev_alert(dev, format, args...) \ + netdev_printk(KERN_ALERT, dev, format, ##args) +#undef netdev_crit +#define netdev_crit(dev, format, args...) \ + netdev_printk(KERN_CRIT, dev, format, ##args) +#undef netdev_err +#define netdev_err(dev, format, args...) \ + netdev_printk(KERN_ERR, dev, format, ##args) +#undef netdev_warn +#define netdev_warn(dev, format, args...) \ + netdev_printk(KERN_WARNING, dev, format, ##args) +#undef netdev_notice +#define netdev_notice(dev, format, args...) \ + netdev_printk(KERN_NOTICE, dev, format, ##args) +#undef netdev_info +#define netdev_info(dev, format, args...) \ + netdev_printk(KERN_INFO, dev, format, ##args) +#undef netdev_dbg +#if defined(DEBUG) +#define netdev_dbg(__dev, format, args...) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args) +#elif defined(CONFIG_DYNAMIC_DEBUG) +#define netdev_dbg(__dev, format, args...) \ +do { \ + dynamic_dev_dbg((__dev)->dev.parent, "%s: " format, \ + netdev_name(__dev), ##args); \ +} while (0) +#else /* DEBUG */ +#define netdev_dbg(__dev, format, args...) \ +({ \ + if (0) \ + netdev_printk(KERN_DEBUG, __dev, format, ##args); \ + 0; \ +}) +#endif /* DEBUG */ + +#undef netif_printk +#define netif_printk(priv, type, level, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_printk(level, (dev), fmt, ##args); \ +} while (0) + +#undef netif_emerg +#define netif_emerg(priv, type, dev, fmt, args...) \ + netif_level(emerg, priv, type, dev, fmt, ##args) +#undef netif_alert +#define netif_alert(priv, type, dev, fmt, args...) \ + netif_level(alert, priv, type, dev, fmt, ##args) +#undef netif_crit +#define netif_crit(priv, type, dev, fmt, args...) \ + netif_level(crit, priv, type, dev, fmt, ##args) +#undef netif_err +#define netif_err(priv, type, dev, fmt, args...) \ + netif_level(err, priv, type, dev, fmt, ##args) +#undef netif_warn +#define netif_warn(priv, type, dev, fmt, args...) \ + netif_level(warn, priv, type, dev, fmt, ##args) +#undef netif_notice +#define netif_notice(priv, type, dev, fmt, args...) \ + netif_level(notice, priv, type, dev, fmt, ##args) +#undef netif_info +#define netif_info(priv, type, dev, fmt, args...) \ + netif_level(info, priv, type, dev, fmt, ##args) + +#ifdef SET_SYSTEM_SLEEP_PM_OPS +#define HAVE_SYSTEM_SLEEP_PM_OPS +#endif + +#ifndef for_each_set_bit +#define for_each_set_bit(bit, addr, size) \ + for ((bit) = find_first_bit((addr), (size)); \ + (bit) < (size); \ + (bit) = find_next_bit((addr), (size), (bit) + 1)) +#endif /* for_each_set_bit */ + +#ifndef DEFINE_DMA_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_ADDR DECLARE_PCI_UNMAP_ADDR +#define DEFINE_DMA_UNMAP_LEN DECLARE_PCI_UNMAP_LEN +#define dma_unmap_addr pci_unmap_addr +#define dma_unmap_addr_set pci_unmap_addr_set +#define dma_unmap_len pci_unmap_len +#define dma_unmap_len_set pci_unmap_len_set +#endif /* DEFINE_DMA_UNMAP_ADDR */ +#else /* < 2.6.34 */ +#define HAVE_SYSTEM_SLEEP_PM_OPS +#ifndef HAVE_SET_RX_MODE +#define HAVE_SET_RX_MODE +#endif + +#endif /* < 2.6.34 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,35) ) +#ifndef numa_node_id +#define numa_node_id() 0 +#endif +#ifdef HAVE_TX_MQ +#include +#ifndef CONFIG_NETDEVICES_MULTIQUEUE +void _kc_netif_set_real_num_tx_queues(struct net_device *, unsigned int); +#define netif_set_real_num_tx_queues _kc_netif_set_real_num_tx_queues +#else /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#define netif_set_real_num_tx_queues(_netdev, _count) \ + do { \ + (_netdev)->egress_subqueue_count = _count; \ + } while (0) +#endif /* CONFIG_NETDEVICES_MULTI_QUEUE */ +#else +#define netif_set_real_num_tx_queues(_netdev, _count) do {} while(0) +#endif /* HAVE_TX_MQ */ +#ifndef ETH_FLAG_RXHASH +#define ETH_FLAG_RXHASH (1<<28) +#endif /* ETH_FLAG_RXHASH */ +#else /* < 2.6.35 */ +#define HAVE_PM_QOS_REQUEST_LIST +#define HAVE_IRQ_AFFINITY_HINT +#endif /* < 2.6.35 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,36) ) +extern int _kc_ethtool_op_set_flags(struct net_device *, u32, u32); +#define ethtool_op_set_flags _kc_ethtool_op_set_flags +extern u32 _kc_ethtool_op_get_flags(struct net_device *); +#define ethtool_op_get_flags _kc_ethtool_op_get_flags + +#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS +#ifdef NET_IP_ALIGN +#undef NET_IP_ALIGN +#endif +#define NET_IP_ALIGN 0 +#endif /* CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS */ + +#ifdef NET_SKB_PAD +#undef NET_SKB_PAD +#endif + +#if (L1_CACHE_BYTES > 32) +#define NET_SKB_PAD L1_CACHE_BYTES +#else +#define NET_SKB_PAD 32 +#endif + +static inline struct sk_buff *_kc_netdev_alloc_skb_ip_align(struct net_device *dev, + unsigned int length) +{ + struct sk_buff *skb; + + skb = alloc_skb(length + NET_SKB_PAD + NET_IP_ALIGN, GFP_ATOMIC); + if (skb) { +#if (NET_IP_ALIGN + NET_SKB_PAD) + skb_reserve(skb, NET_IP_ALIGN + NET_SKB_PAD); +#endif + skb->dev = dev; + } + return skb; +} + +#ifdef netdev_alloc_skb_ip_align +#undef netdev_alloc_skb_ip_align +#endif +#define netdev_alloc_skb_ip_align(n, l) _kc_netdev_alloc_skb_ip_align(n, l) + +#undef netif_level +#define netif_level(level, priv, type, dev, fmt, args...) \ +do { \ + if (netif_msg_##type(priv)) \ + netdev_##level(dev, fmt, ##args); \ +} while (0) + +#undef usleep_range +#define usleep_range(min, max) msleep(DIV_ROUND_UP(min, 1000)) + +#else /* < 2.6.36 */ +#define HAVE_PM_QOS_REQUEST_ACTIVE +#define HAVE_8021P_SUPPORT +#define HAVE_NDO_GET_STATS64 +#endif /* < 2.6.36 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,37) ) +#ifndef ETHTOOL_RXNTUPLE_ACTION_CLEAR +#define ETHTOOL_RXNTUPLE_ACTION_CLEAR (-2) +#endif +#ifndef VLAN_N_VID +#define VLAN_N_VID VLAN_GROUP_ARRAY_LEN +#endif /* VLAN_N_VID */ +#ifndef ETH_FLAG_TXVLAN +#define ETH_FLAG_TXVLAN (1 << 7) +#endif /* ETH_FLAG_TXVLAN */ +#ifndef ETH_FLAG_RXVLAN +#define ETH_FLAG_RXVLAN (1 << 8) +#endif /* ETH_FLAG_RXVLAN */ + +static inline void _kc_skb_checksum_none_assert(struct sk_buff *skb) +{ + WARN_ON(skb->ip_summed != CHECKSUM_NONE); +} +#define skb_checksum_none_assert(skb) _kc_skb_checksum_none_assert(skb) + +static inline void *_kc_vzalloc_node(unsigned long size, int node) +{ + void *addr = vmalloc_node(size, node); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc_node(_size, _node) _kc_vzalloc_node(_size, _node) + +static inline void *_kc_vzalloc(unsigned long size) +{ + void *addr = vmalloc(size); + if (addr) + memset(addr, 0, size); + return addr; +} +#define vzalloc(_size) _kc_vzalloc(_size) + +#ifndef vlan_get_protocol +static inline __be16 __kc_vlan_get_protocol(const struct sk_buff *skb) +{ + if (vlan_tx_tag_present(skb) || + skb->protocol != cpu_to_be16(ETH_P_8021Q)) + return skb->protocol; + + if (skb_headlen(skb) < sizeof(struct vlan_ethhdr)) + return 0; + + return ((struct vlan_ethhdr*)skb->data)->h_vlan_encapsulated_proto; +} +#define vlan_get_protocol(_skb) __kc_vlan_get_protocol(_skb) +#endif +#ifdef HAVE_HW_TIME_STAMP +#define SKBTX_HW_TSTAMP (1 << 0) +#define SKBTX_IN_PROGRESS (1 << 2) +#define SKB_SHARED_TX_IS_UNION +#endif +#if ( LINUX_VERSION_CODE > KERNEL_VERSION(2,4,18) ) +#ifndef HAVE_VLAN_RX_REGISTER +#define HAVE_VLAN_RX_REGISTER +#endif +#endif /* > 2.4.18 */ +#endif /* < 2.6.37 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,38) ) +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,22) ) +#define skb_checksum_start_offset(skb) skb_transport_offset(skb) +#else /* 2.6.22 -> 2.6.37 */ +static inline int _kc_skb_checksum_start_offset(const struct sk_buff *skb) +{ + return skb->csum_start - skb_headroom(skb); +} +#define skb_checksum_start_offset(skb) _kc_skb_checksum_start_offset(skb) +#endif /* 2.6.22 -> 2.6.37 */ +#ifdef CONFIG_DCB +#ifndef IEEE_8021QAZ_MAX_TCS +#define IEEE_8021QAZ_MAX_TCS 8 +#endif +#ifndef DCB_CAP_DCBX_HOST +#define DCB_CAP_DCBX_HOST 0x01 +#endif +#ifndef DCB_CAP_DCBX_LLD_MANAGED +#define DCB_CAP_DCBX_LLD_MANAGED 0x02 +#endif +#ifndef DCB_CAP_DCBX_VER_CEE +#define DCB_CAP_DCBX_VER_CEE 0x04 +#endif +#ifndef DCB_CAP_DCBX_VER_IEEE +#define DCB_CAP_DCBX_VER_IEEE 0x08 +#endif +#ifndef DCB_CAP_DCBX_STATIC +#define DCB_CAP_DCBX_STATIC 0x10 +#endif +#endif /* CONFIG_DCB */ +#else /* < 2.6.38 */ +#endif /* < 2.6.38 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,39) ) +#ifndef skb_queue_reverse_walk_safe +#define skb_queue_reverse_walk_safe(queue, skb, tmp) \ + for (skb = (queue)->prev, tmp = skb->prev; \ + skb != (struct sk_buff *)(queue); \ + skb = tmp, tmp = skb->prev) +#endif +#if (!(RHEL_RELEASE_CODE && RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0))) +extern u8 _kc_netdev_get_num_tc(struct net_device *dev); +#define netdev_get_num_tc(dev) _kc_netdev_get_num_tc(dev) +extern u8 _kc_netdev_get_prio_tc_map(struct net_device *dev, u8 up); +#define netdev_get_prio_tc_map(dev, up) _kc_netdev_get_prio_tc_map(dev, up) +#define netdev_set_prio_tc_map(dev, up, tc) do {} while (0) +#else /* RHEL6.1 or greater */ +#ifndef HAVE_MQPRIO +#define HAVE_MQPRIO +#endif /* HAVE_MQPRIO */ +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_IEEE +#define HAVE_DCBNL_IEEE +#ifndef IEEE_8021QAZ_TSA_STRICT +#define IEEE_8021QAZ_TSA_STRICT 0 +#endif +#ifndef IEEE_8021QAZ_TSA_ETS +#define IEEE_8021QAZ_TSA_ETS 2 +#endif +#ifndef IEEE_8021QAZ_APP_SEL_ETHERTYPE +#define IEEE_8021QAZ_APP_SEL_ETHERTYPE 1 +#endif +#endif +#endif /* CONFIG_DCB */ +#endif /* !(RHEL_RELEASE_CODE > RHEL_RELEASE_VERSION(6,0)) */ +#else /* < 2.6.39 */ +#if defined(CONFIG_FCOE) || defined(CONFIG_FCOE_MODULE) +#ifndef HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#define HAVE_NETDEV_OPS_FCOE_DDP_TARGET +#endif +#endif /* CONFIG_FCOE || CONFIG_FCOE_MODULE */ +#ifndef HAVE_MQPRIO +#define HAVE_MQPRIO +#endif +#ifndef HAVE_SETUP_TC +#define HAVE_SETUP_TC +#endif +#ifdef CONFIG_DCB +#ifndef HAVE_DCBNL_IEEE +#define HAVE_DCBNL_IEEE +#endif +#endif /* CONFIG_DCB */ +#ifndef HAVE_NDO_SET_FEATURES +#define HAVE_NDO_SET_FEATURES +#endif +#endif /* < 2.6.39 */ + +/*****************************************************************************/ +/* use < 2.6.40 because of a Fedora 15 kernel update where they + * updated the kernel version to 2.6.40.x and they back-ported 3.0 features + * like set_phys_id for ethtool. + */ +#undef ETHTOOL_GRXRINGS +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(2,6,40) ) +#ifdef ETHTOOL_GRXRINGS +#ifndef FLOW_EXT +#define FLOW_EXT 0x80000000 +union _kc_ethtool_flow_union { + struct ethtool_tcpip4_spec tcp_ip4_spec; + struct ethtool_usrip4_spec usr_ip4_spec; + __u8 hdata[60]; +}; +struct _kc_ethtool_flow_ext { + __be16 vlan_etype; + __be16 vlan_tci; + __be32 data[2]; +}; +struct _kc_ethtool_rx_flow_spec { + __u32 flow_type; + union _kc_ethtool_flow_union h_u; + struct _kc_ethtool_flow_ext h_ext; + union _kc_ethtool_flow_union m_u; + struct _kc_ethtool_flow_ext m_ext; + __u64 ring_cookie; + __u32 location; +}; +#define ethtool_rx_flow_spec _kc_ethtool_rx_flow_spec +#endif /* FLOW_EXT */ +#endif + +#define pci_disable_link_state_locked pci_disable_link_state + +#ifndef PCI_LTR_VALUE_MASK +#define PCI_LTR_VALUE_MASK 0x000003ff +#endif +#ifndef PCI_LTR_SCALE_MASK +#define PCI_LTR_SCALE_MASK 0x00001c00 +#endif +#ifndef PCI_LTR_SCALE_SHIFT +#define PCI_LTR_SCALE_SHIFT 10 +#endif + +#else /* < 2.6.40 */ +#define HAVE_ETHTOOL_SET_PHYS_ID +#endif /* < 2.6.40 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,1,0) ) +#ifndef __netdev_alloc_skb_ip_align +#define __netdev_alloc_skb_ip_align(d,l,_g) netdev_alloc_skb_ip_align(d,l) +#endif /* __netdev_alloc_skb_ip_align */ +#define dcb_ieee_setapp(dev, app) dcb_setapp(dev, app) +#define dcb_ieee_delapp(dev, app) 0 +#define dcb_ieee_getapp_mask(dev, app) (1 << app->priority) +#else /* < 3.1.0 */ +#ifndef HAVE_DCBNL_IEEE_DELAPP +#define HAVE_DCBNL_IEEE_DELAPP +#endif +#endif /* < 3.1.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,2,0) ) +#ifdef ETHTOOL_GRXRINGS +#define HAVE_ETHTOOL_GET_RXNFC_VOID_RULE_LOCS +#endif /* ETHTOOL_GRXRINGS */ + +#ifndef skb_frag_size +#define skb_frag_size(frag) _kc_skb_frag_size(frag) +static inline unsigned int _kc_skb_frag_size(const skb_frag_t *frag) +{ + return frag->size; +} +#endif /* skb_frag_size */ + +#ifndef skb_frag_size_sub +#define skb_frag_size_sub(frag, delta) _kc_skb_frag_size_sub(frag, delta) +static inline void _kc_skb_frag_size_sub(skb_frag_t *frag, int delta) +{ + frag->size -= delta; +} +#endif /* skb_frag_size_sub */ + +#ifndef skb_frag_page +#define skb_frag_page(frag) _kc_skb_frag_page(frag) +static inline struct page *_kc_skb_frag_page(const skb_frag_t *frag) +{ + return frag->page; +} +#endif /* skb_frag_page */ + +#ifndef skb_frag_address +#define skb_frag_address(frag) _kc_skb_frag_address(frag) +static inline void *_kc_skb_frag_address(const skb_frag_t *frag) +{ + return page_address(skb_frag_page(frag)) + frag->page_offset; +} +#endif /* skb_frag_address */ + +#ifndef skb_frag_dma_map +#define skb_frag_dma_map(dev,frag,offset,size,dir) \ + _kc_skb_frag_dma_map(dev,frag,offset,size,dir) +static inline dma_addr_t _kc_skb_frag_dma_map(struct device *dev, + const skb_frag_t *frag, + size_t offset, size_t size, + enum dma_data_direction dir) +{ + return dma_map_page(dev, skb_frag_page(frag), + frag->page_offset + offset, size, dir); +} +#endif /* skb_frag_dma_map */ + +#ifndef __skb_frag_unref +#define __skb_frag_unref(frag) __kc_skb_frag_unref(frag) +static inline void __kc_skb_frag_unref(skb_frag_t *frag) +{ + put_page(skb_frag_page(frag)); +} +#endif /* __skb_frag_unref */ +#else /* < 3.2.0 */ +#ifndef HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_PCI_DEV_FLAGS_ASSIGNED +#define HAVE_VF_SPOOFCHK_CONFIGURE +#endif +#endif /* < 3.2.0 */ + +#if (RHEL_RELEASE_CODE && \ + (RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6,2)) && \ + (RHEL_RELEASE_CODE < RHEL_RELEASE_VERSION(7,0))) +#undef ixgbe_get_netdev_tc_txq +#define ixgbe_get_netdev_tc_txq(dev, tc) (&netdev_extended(dev)->qos_data.tc_to_txq[tc]) +#endif + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,3,0) ) +typedef u32 kni_netdev_features_t; +#else /* ! < 3.3.0 */ +typedef netdev_features_t kni_netdev_features_t; +#define HAVE_INT_NDO_VLAN_RX_ADD_VID +#ifdef ETHTOOL_SRXNTUPLE +#undef ETHTOOL_SRXNTUPLE +#endif +#endif /* < 3.3.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,4,0) ) +#ifndef NETIF_F_RXFCS +#define NETIF_F_RXFCS 0 +#endif /* NETIF_F_RXFCS */ +#ifndef NETIF_F_RXALL +#define NETIF_F_RXALL 0 +#endif /* NETIF_F_RXALL */ + +#define NUMTCS_RETURNS_U8 + + +#endif /* < 3.4.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,5,0) ) +static inline bool __kc_ether_addr_equal(const u8 *addr1, const u8 *addr2) +{ + return !compare_ether_addr(addr1, addr2); +} +#define ether_addr_equal(_addr1, _addr2) __kc_ether_addr_equal((_addr1),(_addr2)) +#else +#define HAVE_FDB_OPS +#endif /* < 3.5.0 */ + +/*****************************************************************************/ +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,10,0) ) +#define NETIF_F_HW_VLAN_TX NETIF_F_HW_VLAN_CTAG_TX +#define NETIF_F_HW_VLAN_RX NETIF_F_HW_VLAN_CTAG_RX +#define NETIF_F_HW_VLAN_FILTER NETIF_F_HW_VLAN_CTAG_FILTER +#endif /* >= 3.10.0 */ + +#if ( LINUX_VERSION_CODE < KERNEL_VERSION(3,10,0) ) +#ifdef CONFIG_PCI_IOV +extern int __kc_pci_vfs_assigned(struct pci_dev *dev); +#else +static inline int __kc_pci_vfs_assigned(struct pci_dev *dev) +{ + return 0; +} +#endif +#define pci_vfs_assigned(dev) __kc_pci_vfs_assigned(dev) + +#endif + +#if ( LINUX_VERSION_CODE >= KERNEL_VERSION(3,16,0) ) +#define SET_ETHTOOL_OPS(netdev, ops) ((netdev)->ethtool_ops = (ops)) +#endif /* >= 3.16.0 */ + +#endif /* _KCOMPAT_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_dev.h b/lib/librte_eal/linuxapp/kni/kni_dev.h new file mode 100644 index 00000000..a0e5cb6b --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_dev.h @@ -0,0 +1,149 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#ifndef _KNI_DEV_H_ +#define _KNI_DEV_H_ + +#include +#include +#include +#include +#include +#include + +#ifdef RTE_KNI_VHOST +#include +#endif + +#include +#define KNI_KTHREAD_RESCHEDULE_INTERVAL 5 /* us */ + +/** + * A structure describing the private information for a kni device. + */ + +struct kni_dev { + /* kni list */ + struct list_head list; + + struct net_device_stats stats; + int status; + uint16_t group_id; /* Group ID of a group of KNI devices */ + unsigned core_id; /* Core ID to bind */ + char name[RTE_KNI_NAMESIZE]; /* Network device name */ + struct task_struct *pthread; + + /* wait queue for req/resp */ + wait_queue_head_t wq; + struct mutex sync_lock; + + /* PCI device id */ + uint16_t device_id; + + /* kni device */ + struct net_device *net_dev; + struct net_device *lad_dev; + struct pci_dev *pci_dev; + + /* queue for packets to be sent out */ + void *tx_q; + + /* queue for the packets received */ + void *rx_q; + + /* queue for the allocated mbufs those can be used to save sk buffs */ + void *alloc_q; + + /* free queue for the mbufs to be freed */ + void *free_q; + + /* request queue */ + void *req_q; + + /* response queue */ + void *resp_q; + + void * sync_kva; + void *sync_va; + + void *mbuf_kva; + void *mbuf_va; + + /* mbuf size */ + unsigned mbuf_size; + + /* synchro for request processing */ + unsigned long synchro; + +#ifdef RTE_KNI_VHOST + struct kni_vhost_queue* vhost_queue; + volatile enum { + BE_STOP = 0x1, + BE_START = 0x2, + BE_FINISH = 0x4, + }vq_status; +#endif +}; + +#define KNI_ERR(args...) printk(KERN_DEBUG "KNI: Error: " args) +#define KNI_PRINT(args...) printk(KERN_DEBUG "KNI: " args) +#ifdef RTE_KNI_KO_DEBUG + #define KNI_DBG(args...) printk(KERN_DEBUG "KNI: " args) +#else + #define KNI_DBG(args...) +#endif + +#ifdef RTE_KNI_VHOST +unsigned int +kni_poll(struct file *file, struct socket *sock, poll_table * wait); +int kni_chk_vhost_rx(struct kni_dev *kni); +int kni_vhost_init(struct kni_dev *kni); +int kni_vhost_backend_release(struct kni_dev *kni); + +struct kni_vhost_queue { + struct sock sk; + struct socket *sock; + int vnet_hdr_sz; + struct kni_dev *kni; + int sockfd; + unsigned int flags; + struct sk_buff* cache; + struct rte_kni_fifo* fifo; +}; + +#endif + +#ifdef RTE_KNI_VHOST_DEBUG_RX + #define KNI_DBG_RX(args...) printk(KERN_DEBUG "KNI RX: " args) +#else + #define KNI_DBG_RX(args...) +#endif + +#ifdef RTE_KNI_VHOST_DEBUG_TX + #define KNI_DBG_TX(args...) printk(KERN_DEBUG "KNI TX: " args) +#else + #define KNI_DBG_TX(args...) +#endif + +#endif diff --git a/lib/librte_eal/linuxapp/kni/kni_ethtool.c b/lib/librte_eal/linuxapp/kni/kni_ethtool.c new file mode 100644 index 00000000..06b6d463 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_ethtool.c @@ -0,0 +1,217 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include "kni_dev.h" + +static int +kni_check_if_running(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + if (priv->lad_dev) + return 0; + else + return -EOPNOTSUPP; +} + +static void +kni_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_drvinfo(priv->lad_dev, info); +} + +static int +kni_get_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_settings(priv->lad_dev, ecmd); +} + +static int +kni_set_settings(struct net_device *dev, struct ethtool_cmd *ecmd) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_settings(priv->lad_dev, ecmd); +} + +static void +kni_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_wol(priv->lad_dev, wol); +} + +static int +kni_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_wol(priv->lad_dev, wol); +} + +static int +kni_nway_reset(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->nway_reset(priv->lad_dev); +} + +static int +kni_get_eeprom_len(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom_len(priv->lad_dev); +} + +static int +kni_get_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, + u8 *bytes) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_eeprom(priv->lad_dev, eeprom, + bytes); +} + +static int +kni_set_eeprom(struct net_device *dev, struct ethtool_eeprom *eeprom, + u8 *bytes) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_eeprom(priv->lad_dev, eeprom, + bytes); +} + +static void +kni_get_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ringparam(priv->lad_dev, ring); +} + +static int +kni_set_ringparam(struct net_device *dev, struct ethtool_ringparam *ring) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_ringparam(priv->lad_dev, ring); +} + +static void +kni_get_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_pauseparam(priv->lad_dev, pause); +} + +static int +kni_set_pauseparam(struct net_device *dev, struct ethtool_pauseparam *pause) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->set_pauseparam(priv->lad_dev, + pause); +} + +static u32 +kni_get_msglevel(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_msglevel(priv->lad_dev); +} + +static void +kni_set_msglevel(struct net_device *dev, u32 data) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->set_msglevel(priv->lad_dev, data); +} + +static int +kni_get_regs_len(struct net_device *dev) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_regs_len(priv->lad_dev); +} + +static void +kni_get_regs(struct net_device *dev, struct ethtool_regs *regs, void *p) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_regs(priv->lad_dev, regs, p); +} + +static void +kni_get_strings(struct net_device *dev, u32 stringset, u8 *data) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_strings(priv->lad_dev, stringset, + data); +} + +static int +kni_get_sset_count(struct net_device *dev, int sset) +{ + struct kni_dev *priv = netdev_priv(dev); + return priv->lad_dev->ethtool_ops->get_sset_count(priv->lad_dev, sset); +} + +static void +kni_get_ethtool_stats(struct net_device *dev, struct ethtool_stats *stats, + u64 *data) +{ + struct kni_dev *priv = netdev_priv(dev); + priv->lad_dev->ethtool_ops->get_ethtool_stats(priv->lad_dev, stats, + data); +} + +struct ethtool_ops kni_ethtool_ops = { + .begin = kni_check_if_running, + .get_drvinfo = kni_get_drvinfo, + .get_settings = kni_get_settings, + .set_settings = kni_set_settings, + .get_regs_len = kni_get_regs_len, + .get_regs = kni_get_regs, + .get_wol = kni_get_wol, + .set_wol = kni_set_wol, + .nway_reset = kni_nway_reset, + .get_link = ethtool_op_get_link, + .get_eeprom_len = kni_get_eeprom_len, + .get_eeprom = kni_get_eeprom, + .set_eeprom = kni_set_eeprom, + .get_ringparam = kni_get_ringparam, + .set_ringparam = kni_set_ringparam, + .get_pauseparam = kni_get_pauseparam, + .set_pauseparam = kni_set_pauseparam, + .get_msglevel = kni_get_msglevel, + .set_msglevel = kni_set_msglevel, + .get_strings = kni_get_strings, + .get_sset_count = kni_get_sset_count, + .get_ethtool_stats = kni_get_ethtool_stats, +}; + +void +kni_set_ethtool_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &kni_ethtool_ops; +} diff --git a/lib/librte_eal/linuxapp/kni/kni_fifo.h b/lib/librte_eal/linuxapp/kni/kni_fifo.h new file mode 100644 index 00000000..3ea750e2 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_fifo.h @@ -0,0 +1,108 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#ifndef _KNI_FIFO_H_ +#define _KNI_FIFO_H_ + +#include + +/** + * Adds num elements into the fifo. Return the number actually written + */ +static inline unsigned +kni_fifo_put(struct rte_kni_fifo *fifo, void **data, unsigned num) +{ + unsigned i = 0; + unsigned fifo_write = fifo->write; + unsigned fifo_read = fifo->read; + unsigned new_write = fifo_write; + + for (i = 0; i < num; i++) { + new_write = (new_write + 1) & (fifo->len - 1); + + if (new_write == fifo_read) + break; + fifo->buffer[fifo_write] = data[i]; + fifo_write = new_write; + } + fifo->write = fifo_write; + + return i; +} + +/** + * Get up to num elements from the fifo. Return the number actully read + */ +static inline unsigned +kni_fifo_get(struct rte_kni_fifo *fifo, void **data, unsigned num) +{ + unsigned i = 0; + unsigned new_read = fifo->read; + unsigned fifo_write = fifo->write; + + for (i = 0; i < num; i++) { + if (new_read == fifo_write) + break; + + data[i] = fifo->buffer[new_read]; + new_read = (new_read + 1) & (fifo->len - 1); + } + fifo->read = new_read; + + return i; +} + +/** + * Get the num of elements in the fifo + */ +static inline unsigned +kni_fifo_count(struct rte_kni_fifo *fifo) +{ + return (fifo->len + fifo->write - fifo->read) & ( fifo->len - 1); +} + +/** + * Get the num of available elements in the fifo + */ +static inline unsigned +kni_fifo_free_count(struct rte_kni_fifo *fifo) +{ + return (fifo->read - fifo->write - 1) & (fifo->len - 1); +} + +#ifdef RTE_KNI_VHOST +/** + * Initializes the kni fifo structure + */ +static inline void +kni_fifo_init(struct rte_kni_fifo *fifo, unsigned size) +{ + fifo->write = 0; + fifo->read = 0; + fifo->len = size; + fifo->elem_size = sizeof(void *); +} +#endif + +#endif /* _KNI_FIFO_H_ */ diff --git a/lib/librte_eal/linuxapp/kni/kni_misc.c b/lib/librte_eal/linuxapp/kni/kni_misc.c new file mode 100644 index 00000000..ae8133f3 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_misc.c @@ -0,0 +1,688 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "kni_dev.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Kernel Module for managing kni devices"); + +#define KNI_RX_LOOP_NUM 1000 + +#define KNI_MAX_DEVICES 32 + +extern void kni_net_rx(struct kni_dev *kni); +extern void kni_net_init(struct net_device *dev); +extern void kni_net_config_lo_mode(char *lo_str); +extern void kni_net_poll_resp(struct kni_dev *kni); +extern void kni_set_ethtool_ops(struct net_device *netdev); + +extern int ixgbe_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +extern void ixgbe_kni_remove(struct pci_dev *pdev); +extern int igb_kni_probe(struct pci_dev *pdev, struct net_device **lad_dev); +extern void igb_kni_remove(struct pci_dev *pdev); + +static int kni_open(struct inode *inode, struct file *file); +static int kni_release(struct inode *inode, struct file *file); +static int kni_ioctl(struct inode *inode, unsigned int ioctl_num, + unsigned long ioctl_param); +static int kni_compat_ioctl(struct inode *inode, unsigned int ioctl_num, + unsigned long ioctl_param); +static int kni_dev_remove(struct kni_dev *dev); + +static int __init kni_parse_kthread_mode(void); + +/* KNI processing for single kernel thread mode */ +static int kni_thread_single(void *unused); +/* KNI processing for multiple kernel thread mode */ +static int kni_thread_multiple(void *param); + +static struct file_operations kni_fops = { + .owner = THIS_MODULE, + .open = kni_open, + .release = kni_release, + .unlocked_ioctl = (void *)kni_ioctl, + .compat_ioctl = (void *)kni_compat_ioctl, +}; + +static struct miscdevice kni_misc = { + .minor = MISC_DYNAMIC_MINOR, + .name = KNI_DEVICE, + .fops = &kni_fops, +}; + +/* loopback mode */ +static char *lo_mode = NULL; + +/* Kernel thread mode */ +static char *kthread_mode = NULL; +static unsigned multiple_kthread_on = 0; + +#define KNI_DEV_IN_USE_BIT_NUM 0 /* Bit number for device in use */ + +static int kni_net_id; + +struct kni_net { + unsigned long device_in_use; /* device in use flag */ + struct task_struct *kni_kthread; + struct rw_semaphore kni_list_lock; + struct list_head kni_list_head; +}; + +static int __net_init kni_init_net(struct net *net) +{ +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + struct kni_net *knet = net_generic(net, kni_net_id); +#else + struct kni_net *knet; + int ret; + + knet = kmalloc(sizeof(struct kni_net), GFP_KERNEL); + if (!knet) { + ret = -ENOMEM; + return ret; + } +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + + /* Clear the bit of device in use */ + clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + + init_rwsem(&knet->kni_list_lock); + INIT_LIST_HEAD(&knet->kni_list_head); + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + return 0; +#else + ret = net_assign_generic(net, kni_net_id, knet); + if (ret < 0) + kfree(knet); + + return ret; +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +} + +static void __net_exit kni_exit_net(struct net *net) +{ +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2, 6, 32) + struct kni_net *knet = net_generic(net, kni_net_id); + + kfree(knet); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +} + +static struct pernet_operations kni_net_ops = { + .init = kni_init_net, + .exit = kni_exit_net, +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + .id = &kni_net_id, + .size = sizeof(struct kni_net), +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ +}; + +static int __init +kni_init(void) +{ + int rc; + + KNI_PRINT("######## DPDK kni module loading ########\n"); + + if (kni_parse_kthread_mode() < 0) { + KNI_ERR("Invalid parameter for kthread_mode\n"); + return -EINVAL; + } + +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + rc = register_pernet_subsys(&kni_net_ops); +#else + rc = register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + if (rc) + return -EPERM; + + rc = misc_register(&kni_misc); + if (rc != 0) { + KNI_ERR("Misc registration failed\n"); + goto out; + } + + /* Configure the lo mode according to the input parameter */ + kni_net_config_lo_mode(lo_mode); + + KNI_PRINT("######## DPDK kni module loaded ########\n"); + + return 0; + +out: +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + unregister_pernet_subsys(&kni_net_ops); +#else + register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + return rc; +} + +static void __exit +kni_exit(void) +{ + misc_deregister(&kni_misc); +#if LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) + unregister_pernet_subsys(&kni_net_ops); +#else + register_pernet_gen_subsys(&kni_net_id, &kni_net_ops); +#endif /* LINUX_VERSION_CODE > KERNEL_VERSION(2, 6, 32) */ + KNI_PRINT("####### DPDK kni module unloaded #######\n"); +} + +static int __init +kni_parse_kthread_mode(void) +{ + if (!kthread_mode) + return 0; + + if (strcmp(kthread_mode, "single") == 0) + return 0; + else if (strcmp(kthread_mode, "multiple") == 0) + multiple_kthread_on = 1; + else + return -1; + + return 0; +} + +static int +kni_open(struct inode *inode, struct file *file) +{ + struct net *net = current->nsproxy->net_ns; + struct kni_net *knet = net_generic(net, kni_net_id); + + /* kni device can be opened by one user only per netns */ + if (test_and_set_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use)) + return -EBUSY; + + /* Create kernel thread for single mode */ + if (multiple_kthread_on == 0) { + KNI_PRINT("Single kernel thread for all KNI devices\n"); + /* Create kernel thread for RX */ + knet->kni_kthread = kthread_run(kni_thread_single, (void *)knet, + "kni_single"); + if (IS_ERR(knet->kni_kthread)) { + KNI_ERR("Unable to create kernel threaed\n"); + return PTR_ERR(knet->kni_kthread); + } + } else + KNI_PRINT("Multiple kernel thread mode enabled\n"); + + file->private_data = get_net(net); + KNI_PRINT("/dev/kni opened\n"); + + return 0; +} + +static int +kni_release(struct inode *inode, struct file *file) +{ + struct net *net = file->private_data; + struct kni_net *knet = net_generic(net, kni_net_id); + struct kni_dev *dev, *n; + + /* Stop kernel thread for single mode */ + if (multiple_kthread_on == 0) { + /* Stop kernel thread */ + kthread_stop(knet->kni_kthread); + knet->kni_kthread = NULL; + } + + down_write(&knet->kni_list_lock); + list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { + /* Stop kernel thread for multiple mode */ + if (multiple_kthread_on && dev->pthread != NULL) { + kthread_stop(dev->pthread); + dev->pthread = NULL; + } + +#ifdef RTE_KNI_VHOST + kni_vhost_backend_release(dev); +#endif + kni_dev_remove(dev); + list_del(&dev->list); + } + up_write(&knet->kni_list_lock); + + /* Clear the bit of device in use */ + clear_bit(KNI_DEV_IN_USE_BIT_NUM, &knet->device_in_use); + + put_net(net); + KNI_PRINT("/dev/kni closed\n"); + + return 0; +} + +static int +kni_thread_single(void *data) +{ + struct kni_net *knet = data; + int j; + struct kni_dev *dev; + + while (!kthread_should_stop()) { + down_read(&knet->kni_list_lock); + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { + list_for_each_entry(dev, &knet->kni_list_head, list) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); +#else + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } + } + up_read(&knet->kni_list_lock); +#ifdef RTE_KNI_PREEMPT_DEFAULT + /* reschedule out for a while */ + schedule_timeout_interruptible(usecs_to_jiffies( \ + KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } + + return 0; +} + +static int +kni_thread_multiple(void *param) +{ + int j; + struct kni_dev *dev = (struct kni_dev *)param; + + while (!kthread_should_stop()) { + for (j = 0; j < KNI_RX_LOOP_NUM; j++) { +#ifdef RTE_KNI_VHOST + kni_chk_vhost_rx(dev); +#else + kni_net_rx(dev); +#endif + kni_net_poll_resp(dev); + } +#ifdef RTE_KNI_PREEMPT_DEFAULT + schedule_timeout_interruptible(usecs_to_jiffies( \ + KNI_KTHREAD_RESCHEDULE_INTERVAL)); +#endif + } + + return 0; +} + +static int +kni_dev_remove(struct kni_dev *dev) +{ + if (!dev) + return -ENODEV; + + switch (dev->device_id) { + #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): + #include + igb_kni_remove(dev->pci_dev); + break; + #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) case (dev): + #include + ixgbe_kni_remove(dev->pci_dev); + break; + default: + break; + } + + if (dev->net_dev) { + unregister_netdev(dev->net_dev); + free_netdev(dev->net_dev); + } + + return 0; +} + +static int +kni_check_param(struct kni_dev *kni, struct rte_kni_device_info *dev) +{ + if (!kni || !dev) + return -1; + + /* Check if network name has been used */ + if (!strncmp(kni->name, dev->name, RTE_KNI_NAMESIZE)) { + KNI_ERR("KNI name %s duplicated\n", dev->name); + return -1; + } + + return 0; +} + +static int +kni_ioctl_create(struct net *net, + unsigned int ioctl_num, unsigned long ioctl_param) +{ + struct kni_net *knet = net_generic(net, kni_net_id); + int ret; + struct rte_kni_device_info dev_info; + struct pci_dev *pci = NULL; + struct pci_dev *found_pci = NULL; + struct net_device *net_dev = NULL; + struct net_device *lad_dev = NULL; + struct kni_dev *kni, *dev, *n; + + printk(KERN_INFO "KNI: Creating kni...\n"); + /* Check the buffer size, to avoid warning */ + if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) + return -EINVAL; + + /* Copy kni info from user space */ + ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); + if (ret) { + KNI_ERR("copy_from_user in kni_ioctl_create"); + return -EIO; + } + + /** + * Check if the cpu core id is valid for binding, + * for multiple kernel thread mode. + */ + if (multiple_kthread_on && dev_info.force_bind && + !cpu_online(dev_info.core_id)) { + KNI_ERR("cpu %u is not online\n", dev_info.core_id); + return -EINVAL; + } + + /* Check if it has been created */ + down_read(&knet->kni_list_lock); + list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { + if (kni_check_param(dev, &dev_info) < 0) { + up_read(&knet->kni_list_lock); + return -EINVAL; + } + } + up_read(&knet->kni_list_lock); + + net_dev = alloc_netdev(sizeof(struct kni_dev), dev_info.name, +#ifdef NET_NAME_UNKNOWN + NET_NAME_UNKNOWN, +#endif + kni_net_init); + if (net_dev == NULL) { + KNI_ERR("error allocating device \"%s\"\n", dev_info.name); + return -EBUSY; + } + + dev_net_set(net_dev, net); + + kni = netdev_priv(net_dev); + + kni->net_dev = net_dev; + kni->group_id = dev_info.group_id; + kni->core_id = dev_info.core_id; + strncpy(kni->name, dev_info.name, RTE_KNI_NAMESIZE); + + /* Translate user space info into kernel space info */ + kni->tx_q = phys_to_virt(dev_info.tx_phys); + kni->rx_q = phys_to_virt(dev_info.rx_phys); + kni->alloc_q = phys_to_virt(dev_info.alloc_phys); + kni->free_q = phys_to_virt(dev_info.free_phys); + + kni->req_q = phys_to_virt(dev_info.req_phys); + kni->resp_q = phys_to_virt(dev_info.resp_phys); + kni->sync_va = dev_info.sync_va; + kni->sync_kva = phys_to_virt(dev_info.sync_phys); + + kni->mbuf_kva = phys_to_virt(dev_info.mbuf_phys); + kni->mbuf_va = dev_info.mbuf_va; + +#ifdef RTE_KNI_VHOST + kni->vhost_queue = NULL; + kni->vq_status = BE_STOP; +#endif + kni->mbuf_size = dev_info.mbuf_size; + + KNI_PRINT("tx_phys: 0x%016llx, tx_q addr: 0x%p\n", + (unsigned long long) dev_info.tx_phys, kni->tx_q); + KNI_PRINT("rx_phys: 0x%016llx, rx_q addr: 0x%p\n", + (unsigned long long) dev_info.rx_phys, kni->rx_q); + KNI_PRINT("alloc_phys: 0x%016llx, alloc_q addr: 0x%p\n", + (unsigned long long) dev_info.alloc_phys, kni->alloc_q); + KNI_PRINT("free_phys: 0x%016llx, free_q addr: 0x%p\n", + (unsigned long long) dev_info.free_phys, kni->free_q); + KNI_PRINT("req_phys: 0x%016llx, req_q addr: 0x%p\n", + (unsigned long long) dev_info.req_phys, kni->req_q); + KNI_PRINT("resp_phys: 0x%016llx, resp_q addr: 0x%p\n", + (unsigned long long) dev_info.resp_phys, kni->resp_q); + KNI_PRINT("mbuf_phys: 0x%016llx, mbuf_kva: 0x%p\n", + (unsigned long long) dev_info.mbuf_phys, kni->mbuf_kva); + KNI_PRINT("mbuf_va: 0x%p\n", dev_info.mbuf_va); + KNI_PRINT("mbuf_size: %u\n", kni->mbuf_size); + + KNI_DBG("PCI: %02x:%02x.%02x %04x:%04x\n", + dev_info.bus, + dev_info.devid, + dev_info.function, + dev_info.vendor_id, + dev_info.device_id); + + pci = pci_get_device(dev_info.vendor_id, dev_info.device_id, NULL); + + /* Support Ethtool */ + while (pci) { + KNI_PRINT("pci_bus: %02x:%02x:%02x \n", + pci->bus->number, + PCI_SLOT(pci->devfn), + PCI_FUNC(pci->devfn)); + + if ((pci->bus->number == dev_info.bus) && + (PCI_SLOT(pci->devfn) == dev_info.devid) && + (PCI_FUNC(pci->devfn) == dev_info.function)) { + found_pci = pci; + switch (dev_info.device_id) { + #define RTE_PCI_DEV_ID_DECL_IGB(vend, dev) case (dev): + #include + ret = igb_kni_probe(found_pci, &lad_dev); + break; + #define RTE_PCI_DEV_ID_DECL_IXGBE(vend, dev) \ + case (dev): + #include + ret = ixgbe_kni_probe(found_pci, &lad_dev); + break; + default: + ret = -1; + break; + } + + KNI_DBG("PCI found: pci=0x%p, lad_dev=0x%p\n", + pci, lad_dev); + if (ret == 0) { + kni->lad_dev = lad_dev; + kni_set_ethtool_ops(kni->net_dev); + } else { + KNI_ERR("Device not supported by ethtool"); + kni->lad_dev = NULL; + } + + kni->pci_dev = found_pci; + kni->device_id = dev_info.device_id; + break; + } + pci = pci_get_device(dev_info.vendor_id, + dev_info.device_id, pci); + } + if (pci) + pci_dev_put(pci); + + ret = register_netdev(net_dev); + if (ret) { + KNI_ERR("error %i registering device \"%s\"\n", + ret, dev_info.name); + kni_dev_remove(kni); + return -ENODEV; + } + +#ifdef RTE_KNI_VHOST + kni_vhost_init(kni); +#endif + + /** + * Create a new kernel thread for multiple mode, set its core affinity, + * and finally wake it up. + */ + if (multiple_kthread_on) { + kni->pthread = kthread_create(kni_thread_multiple, + (void *)kni, + "kni_%s", kni->name); + if (IS_ERR(kni->pthread)) { + kni_dev_remove(kni); + return -ECANCELED; + } + if (dev_info.force_bind) + kthread_bind(kni->pthread, kni->core_id); + wake_up_process(kni->pthread); + } + + down_write(&knet->kni_list_lock); + list_add(&kni->list, &knet->kni_list_head); + up_write(&knet->kni_list_lock); + + return 0; +} + +static int +kni_ioctl_release(struct net *net, + unsigned int ioctl_num, unsigned long ioctl_param) +{ + struct kni_net *knet = net_generic(net, kni_net_id); + int ret = -EINVAL; + struct kni_dev *dev, *n; + struct rte_kni_device_info dev_info; + + if (_IOC_SIZE(ioctl_num) > sizeof(dev_info)) + return -EINVAL; + + ret = copy_from_user(&dev_info, (void *)ioctl_param, sizeof(dev_info)); + if (ret) { + KNI_ERR("copy_from_user in kni_ioctl_release"); + return -EIO; + } + + /* Release the network device according to its name */ + if (strlen(dev_info.name) == 0) + return ret; + + down_write(&knet->kni_list_lock); + list_for_each_entry_safe(dev, n, &knet->kni_list_head, list) { + if (strncmp(dev->name, dev_info.name, RTE_KNI_NAMESIZE) != 0) + continue; + + if (multiple_kthread_on && dev->pthread != NULL) { + kthread_stop(dev->pthread); + dev->pthread = NULL; + } + +#ifdef RTE_KNI_VHOST + kni_vhost_backend_release(dev); +#endif + kni_dev_remove(dev); + list_del(&dev->list); + ret = 0; + break; + } + up_write(&knet->kni_list_lock); + printk(KERN_INFO "KNI: %s release kni named %s\n", + (ret == 0 ? "Successfully" : "Unsuccessfully"), dev_info.name); + + return ret; +} + +static int +kni_ioctl(struct inode *inode, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + int ret = -EINVAL; + struct net *net = current->nsproxy->net_ns; + + KNI_DBG("IOCTL num=0x%0x param=0x%0lx\n", ioctl_num, ioctl_param); + + /* + * Switch according to the ioctl called + */ + switch (_IOC_NR(ioctl_num)) { + case _IOC_NR(RTE_KNI_IOCTL_TEST): + /* For test only, not used */ + break; + case _IOC_NR(RTE_KNI_IOCTL_CREATE): + ret = kni_ioctl_create(net, ioctl_num, ioctl_param); + break; + case _IOC_NR(RTE_KNI_IOCTL_RELEASE): + ret = kni_ioctl_release(net, ioctl_num, ioctl_param); + break; + default: + KNI_DBG("IOCTL default\n"); + break; + } + + return ret; +} + +static int +kni_compat_ioctl(struct inode *inode, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + /* 32 bits app on 64 bits OS to be supported later */ + KNI_PRINT("Not implemented.\n"); + + return -EINVAL; +} + +module_init(kni_init); +module_exit(kni_exit); + +module_param(lo_mode, charp, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(lo_mode, +"KNI loopback mode (default=lo_mode_none):\n" +" lo_mode_none Kernel loopback disabled\n" +" lo_mode_fifo Enable kernel loopback with fifo\n" +" lo_mode_fifo_skb Enable kernel loopback with fifo and skb buffer\n" +"\n" +); + +module_param(kthread_mode, charp, S_IRUGO); +MODULE_PARM_DESC(kthread_mode, +"Kernel thread mode (default=single):\n" +" single Single kernel thread mode enabled.\n" +" multiple Multiple kernel thread mode enabled.\n" +"\n" +); diff --git a/lib/librte_eal/linuxapp/kni/kni_net.c b/lib/librte_eal/linuxapp/kni/kni_net.c new file mode 100644 index 00000000..cfa83398 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_net.c @@ -0,0 +1,706 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +/* + * This code is inspired from the book "Linux Device Drivers" by + * Alessandro Rubini and Jonathan Corbet, published by O'Reilly & Associates + */ + +#include +#include +#include +#include +#include /* eth_type_trans */ +#include +#include +#include + +#include +#include +#include "kni_dev.h" + +#define WD_TIMEOUT 5 /*jiffies */ + +#define MBUF_BURST_SZ 32 + +#define KNI_WAIT_RESPONSE_TIMEOUT 300 /* 3 seconds */ + +/* typedef for rx function */ +typedef void (*kni_net_rx_t)(struct kni_dev *kni); + +static int kni_net_tx(struct sk_buff *skb, struct net_device *dev); +static void kni_net_rx_normal(struct kni_dev *kni); +static void kni_net_rx_lo_fifo(struct kni_dev *kni); +static void kni_net_rx_lo_fifo_skb(struct kni_dev *kni); +static int kni_net_process_request(struct kni_dev *kni, + struct rte_kni_request *req); + +/* kni rx function pointer, with default to normal rx */ +static kni_net_rx_t kni_net_rx_func = kni_net_rx_normal; + +/* + * Open and close + */ +static int +kni_net_open(struct net_device *dev) +{ + int ret; + struct rte_kni_request req; + struct kni_dev *kni = netdev_priv(dev); + + if (kni->lad_dev) + memcpy(dev->dev_addr, kni->lad_dev->dev_addr, ETH_ALEN); + else + /* + * Generate random mac address. eth_random_addr() is the newer + * version of generating mac address in linux kernel. + */ + random_ether_addr(dev->dev_addr); + + netif_start_queue(dev); + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF; + + /* Setting if_up to non-zero means up */ + req.if_up = 1; + ret = kni_net_process_request(kni, &req); + + return (ret == 0) ? req.result : ret; +} + +static int +kni_net_release(struct net_device *dev) +{ + int ret; + struct rte_kni_request req; + struct kni_dev *kni = netdev_priv(dev); + + netif_stop_queue(dev); /* can't transmit any more */ + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CFG_NETWORK_IF; + + /* Setting if_up to 0 means down */ + req.if_up = 0; + ret = kni_net_process_request(kni, &req); + + return (ret == 0) ? req.result : ret; +} + +/* + * Configuration changes (passed on by ifconfig) + */ +static int +kni_net_config(struct net_device *dev, struct ifmap *map) +{ + if (dev->flags & IFF_UP) /* can't act on a running interface */ + return -EBUSY; + + /* ignore other fields */ + return 0; +} + +/* + * RX: normal working mode + */ +static void +kni_net_rx_normal(struct kni_dev *kni) +{ + unsigned ret; + uint32_t len; + unsigned i, num_rx, num_fq; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va[MBUF_BURST_SZ]; + void * data_kva; + + struct sk_buff *skb; + struct net_device *dev = kni->net_dev; + + /* Get the number of free entries in free_q */ + num_fq = kni_fifo_free_count(kni->free_q); + if (num_fq == 0) { + /* No room on the free_q, bail out */ + return; + } + + /* Calculate the number of entries to dequeue from rx_q */ + num_rx = min(num_fq, (unsigned)MBUF_BURST_SZ); + + /* Burst dequeue from rx_q */ + num_rx = kni_fifo_get(kni->rx_q, (void **)va, num_rx); + if (num_rx == 0) + return; + + /* Transfer received packets to netif */ + for (i = 0; i < num_rx; i++) { + kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + len = kva->data_len; + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + + skb = dev_alloc_skb(len + 2); + if (!skb) { + KNI_ERR("Out of mem, dropping pkts\n"); + /* Update statistics */ + kni->stats.rx_dropped++; + } + else { + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + memcpy(skb_put(skb, len), data_kva, len); + skb->dev = dev; + skb->protocol = eth_type_trans(skb, dev); + skb->ip_summed = CHECKSUM_UNNECESSARY; + + /* Call netif interface */ + netif_rx_ni(skb); + + /* Update statistics */ + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; + } + } + + /* Burst enqueue mbufs into free_q */ + ret = kni_fifo_put(kni->free_q, (void **)va, num_rx); + if (ret != num_rx) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue entries into free_q\n"); +} + +/* + * RX: loopback with enqueue/dequeue fifos. + */ +static void +kni_net_rx_lo_fifo(struct kni_dev *kni) +{ + unsigned ret; + uint32_t len; + unsigned i, num, num_rq, num_tq, num_aq, num_fq; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va[MBUF_BURST_SZ]; + void * data_kva; + + struct rte_kni_mbuf *alloc_kva; + struct rte_kni_mbuf *alloc_va[MBUF_BURST_SZ]; + void *alloc_data_kva; + + /* Get the number of entries in rx_q */ + num_rq = kni_fifo_count(kni->rx_q); + + /* Get the number of free entrie in tx_q */ + num_tq = kni_fifo_free_count(kni->tx_q); + + /* Get the number of entries in alloc_q */ + num_aq = kni_fifo_count(kni->alloc_q); + + /* Get the number of free entries in free_q */ + num_fq = kni_fifo_free_count(kni->free_q); + + /* Calculate the number of entries to be dequeued from rx_q */ + num = min(num_rq, num_tq); + num = min(num, num_aq); + num = min(num, num_fq); + num = min(num, (unsigned)MBUF_BURST_SZ); + + /* Return if no entry to dequeue from rx_q */ + if (num == 0) + return; + + /* Burst dequeue from rx_q */ + ret = kni_fifo_get(kni->rx_q, (void **)va, num); + if (ret == 0) + return; /* Failing should not happen */ + + /* Dequeue entries from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, (void **)alloc_va, num); + if (ret) { + num = ret; + /* Copy mbufs */ + for (i = 0; i < num; i++) { + kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + len = kva->pkt_len; + data_kva = kva->buf_addr + kva->data_off - + kni->mbuf_va + kni->mbuf_kva; + + alloc_kva = (void *)alloc_va[i] - kni->mbuf_va + + kni->mbuf_kva; + alloc_data_kva = alloc_kva->buf_addr + + alloc_kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + memcpy(alloc_data_kva, data_kva, len); + alloc_kva->pkt_len = len; + alloc_kva->data_len = len; + + kni->stats.tx_bytes += len; + kni->stats.rx_bytes += len; + } + + /* Burst enqueue mbufs into tx_q */ + ret = kni_fifo_put(kni->tx_q, (void **)alloc_va, num); + if (ret != num) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbufs into tx_q\n"); + } + + /* Burst enqueue mbufs into free_q */ + ret = kni_fifo_put(kni->free_q, (void **)va, num); + if (ret != num) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbufs into free_q\n"); + + /** + * Update statistic, and enqueue/dequeue failure is impossible, + * as all queues are checked at first. + */ + kni->stats.tx_packets += num; + kni->stats.rx_packets += num; +} + +/* + * RX: loopback with enqueue/dequeue fifos and sk buffer copies. + */ +static void +kni_net_rx_lo_fifo_skb(struct kni_dev *kni) +{ + unsigned ret; + uint32_t len; + unsigned i, num_rq, num_fq, num; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va[MBUF_BURST_SZ]; + void * data_kva; + + struct sk_buff *skb; + struct net_device *dev = kni->net_dev; + + /* Get the number of entries in rx_q */ + num_rq = kni_fifo_count(kni->rx_q); + + /* Get the number of free entries in free_q */ + num_fq = kni_fifo_free_count(kni->free_q); + + /* Calculate the number of entries to dequeue from rx_q */ + num = min(num_rq, num_fq); + num = min(num, (unsigned)MBUF_BURST_SZ); + + /* Return if no entry to dequeue from rx_q */ + if (num == 0) + return; + + /* Burst dequeue mbufs from rx_q */ + ret = kni_fifo_get(kni->rx_q, (void **)va, num); + if (ret == 0) + return; + + /* Copy mbufs to sk buffer and then call tx interface */ + for (i = 0; i < num; i++) { + kva = (void *)va[i] - kni->mbuf_va + kni->mbuf_kva; + len = kva->data_len; + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + + skb = dev_alloc_skb(len + 2); + if (skb == NULL) + KNI_ERR("Out of mem, dropping pkts\n"); + else { + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + memcpy(skb_put(skb, len), data_kva, len); + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + dev_kfree_skb(skb); + } + + /* Simulate real usage, allocate/copy skb twice */ + skb = dev_alloc_skb(len + 2); + if (skb == NULL) { + KNI_ERR("Out of mem, dropping pkts\n"); + kni->stats.rx_dropped++; + } + else { + /* Align IP on 16B boundary */ + skb_reserve(skb, 2); + memcpy(skb_put(skb, len), data_kva, len); + skb->dev = dev; + skb->ip_summed = CHECKSUM_UNNECESSARY; + + kni->stats.rx_bytes += len; + kni->stats.rx_packets++; + + /* call tx interface */ + kni_net_tx(skb, dev); + } + } + + /* enqueue all the mbufs from rx_q into free_q */ + ret = kni_fifo_put(kni->free_q, (void **)&va, num); + if (ret != num) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbufs into free_q\n"); +} + +/* rx interface */ +void +kni_net_rx(struct kni_dev *kni) +{ + /** + * It doesn't need to check if it is NULL pointer, + * as it has a default value + */ + (*kni_net_rx_func)(kni); +} + +/* + * Transmit a packet (called by the kernel) + */ +#ifdef RTE_KNI_VHOST +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#else +static int +kni_net_tx(struct sk_buff *skb, struct net_device *dev) +{ + int len = 0; + unsigned ret; + struct kni_dev *kni = netdev_priv(dev); + struct rte_kni_mbuf *pkt_kva = NULL; + struct rte_kni_mbuf *pkt_va = NULL; + + dev->trans_start = jiffies; /* save the timestamp */ + + /* Check if the length of skb is less than mbuf size */ + if (skb->len > kni->mbuf_size) + goto drop; + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; + data_kva = pkt_kva->buf_addr + pkt_kva->data_off - kni->mbuf_va + + kni->mbuf_kva; + + len = skb->len; + memcpy(data_kva, skb->data, len); + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return NETDEV_TX_OK; + +drop: + /* Free skb and update statistics */ + dev_kfree_skb(skb); + kni->stats.tx_dropped++; + + return NETDEV_TX_OK; +} +#endif + +/* + * Deal with a transmit timeout. + */ +static void +kni_net_tx_timeout (struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + KNI_DBG("Transmit timeout at %ld, latency %ld\n", jiffies, + jiffies - dev->trans_start); + + kni->stats.tx_errors++; + netif_wake_queue(dev); + return; +} + +/* + * Ioctl commands + */ +static int +kni_net_ioctl(struct net_device *dev, struct ifreq *rq, int cmd) +{ + KNI_DBG("kni_net_ioctl %d\n", + ((struct kni_dev *)netdev_priv(dev))->group_id); + + return 0; +} + +static void +kni_net_set_rx_mode(struct net_device *dev) +{ +} + +static int +kni_net_change_mtu(struct net_device *dev, int new_mtu) +{ + int ret; + struct rte_kni_request req; + struct kni_dev *kni = netdev_priv(dev); + + KNI_DBG("kni_net_change_mtu new mtu %d to be set\n", new_mtu); + + memset(&req, 0, sizeof(req)); + req.req_id = RTE_KNI_REQ_CHANGE_MTU; + req.new_mtu = new_mtu; + ret = kni_net_process_request(kni, &req); + if (ret == 0 && req.result == 0) + dev->mtu = new_mtu; + + return (ret == 0) ? req.result : ret; +} + +/* + * Checks if the user space application provided the resp message + */ +void +kni_net_poll_resp(struct kni_dev *kni) +{ + if (kni_fifo_count(kni->resp_q)) + wake_up_interruptible(&kni->wq); +} + +/* + * It can be called to process the request. + */ +static int +kni_net_process_request(struct kni_dev *kni, struct rte_kni_request *req) +{ + int ret = -1; + void *resp_va; + unsigned num; + int ret_val; + + if (!kni || !req) { + KNI_ERR("No kni instance or request\n"); + return -EINVAL; + } + + mutex_lock(&kni->sync_lock); + + /* Construct data */ + memcpy(kni->sync_kva, req, sizeof(struct rte_kni_request)); + num = kni_fifo_put(kni->req_q, &kni->sync_va, 1); + if (num < 1) { + KNI_ERR("Cannot send to req_q\n"); + ret = -EBUSY; + goto fail; + } + + ret_val = wait_event_interruptible_timeout(kni->wq, + kni_fifo_count(kni->resp_q), 3 * HZ); + if (signal_pending(current) || ret_val <= 0) { + ret = -ETIME; + goto fail; + } + num = kni_fifo_get(kni->resp_q, (void **)&resp_va, 1); + if (num != 1 || resp_va != kni->sync_va) { + /* This should never happen */ + KNI_ERR("No data in resp_q\n"); + ret = -ENODATA; + goto fail; + } + + memcpy(req, kni->sync_kva, sizeof(struct rte_kni_request)); + ret = 0; + +fail: + mutex_unlock(&kni->sync_lock); + return ret; +} + +/* + * Return statistics to the caller + */ +static struct net_device_stats * +kni_net_stats(struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + return &kni->stats; +} + +/* + * Fill the eth header + */ +static int +kni_net_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, const void *daddr, + const void *saddr, unsigned int len) +{ + struct ethhdr *eth = (struct ethhdr *) skb_push(skb, ETH_HLEN); + + memcpy(eth->h_source, saddr ? saddr : dev->dev_addr, dev->addr_len); + memcpy(eth->h_dest, daddr ? daddr : dev->dev_addr, dev->addr_len); + eth->h_proto = htons(type); + + return dev->hard_header_len; +} + + +/* + * Re-fill the eth header + */ +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) +static int +kni_net_rebuild_header(struct sk_buff *skb) +{ + struct net_device *dev = skb->dev; + struct ethhdr *eth = (struct ethhdr *) skb->data; + + memcpy(eth->h_source, dev->dev_addr, dev->addr_len); + memcpy(eth->h_dest, dev->dev_addr, dev->addr_len); + + return 0; +} +#endif /* < 4.1.0 */ + +/** + * kni_net_set_mac - Change the Ethernet Address of the KNI NIC + * @netdev: network interface device structure + * @p: pointer to an address structure + * + * Returns 0 on success, negative on failure + **/ +static int kni_net_set_mac(struct net_device *netdev, void *p) +{ + struct sockaddr *addr = p; + if (!is_valid_ether_addr((unsigned char *)(addr->sa_data))) + return -EADDRNOTAVAIL; + memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + return 0; +} + +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) +static int kni_net_change_carrier(struct net_device *dev, bool new_carrier) +{ + if (new_carrier) + netif_carrier_on(dev); + else + netif_carrier_off(dev); + return 0; +} +#endif + +static const struct header_ops kni_net_header_ops = { + .create = kni_net_header, +#if (LINUX_VERSION_CODE < KERNEL_VERSION(4, 1, 0)) + .rebuild = kni_net_rebuild_header, +#endif /* < 4.1.0 */ + .cache = NULL, /* disable caching */ +}; + +static const struct net_device_ops kni_net_netdev_ops = { + .ndo_open = kni_net_open, + .ndo_stop = kni_net_release, + .ndo_set_config = kni_net_config, + .ndo_start_xmit = kni_net_tx, + .ndo_change_mtu = kni_net_change_mtu, + .ndo_do_ioctl = kni_net_ioctl, + .ndo_set_rx_mode = kni_net_set_rx_mode, + .ndo_get_stats = kni_net_stats, + .ndo_tx_timeout = kni_net_tx_timeout, + .ndo_set_mac_address = kni_net_set_mac, +#if (LINUX_VERSION_CODE >= KERNEL_VERSION(3, 9, 0)) + .ndo_change_carrier = kni_net_change_carrier, +#endif +}; + +void +kni_net_init(struct net_device *dev) +{ + struct kni_dev *kni = netdev_priv(dev); + + KNI_DBG("kni_net_init\n"); + + init_waitqueue_head(&kni->wq); + mutex_init(&kni->sync_lock); + + ether_setup(dev); /* assign some of the fields */ + dev->netdev_ops = &kni_net_netdev_ops; + dev->header_ops = &kni_net_header_ops; + dev->watchdog_timeo = WD_TIMEOUT; +} + +void +kni_net_config_lo_mode(char *lo_str) +{ + if (!lo_str) { + KNI_PRINT("loopback disabled"); + return; + } + + if (!strcmp(lo_str, "lo_mode_none")) + KNI_PRINT("loopback disabled"); + else if (!strcmp(lo_str, "lo_mode_fifo")) { + KNI_PRINT("loopback mode=lo_mode_fifo enabled"); + kni_net_rx_func = kni_net_rx_lo_fifo; + } else if (!strcmp(lo_str, "lo_mode_fifo_skb")) { + KNI_PRINT("loopback mode=lo_mode_fifo_skb enabled"); + kni_net_rx_func = kni_net_rx_lo_fifo_skb; + } else + KNI_PRINT("Incognizant parameter, loopback disabled"); +} diff --git a/lib/librte_eal/linuxapp/kni/kni_vhost.c b/lib/librte_eal/linuxapp/kni/kni_vhost.c new file mode 100644 index 00000000..a3ca8499 --- /dev/null +++ b/lib/librte_eal/linuxapp/kni/kni_vhost.c @@ -0,0 +1,857 @@ +/*- + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "compat.h" +#include "kni_dev.h" +#include "kni_fifo.h" + +#define RX_BURST_SZ 4 + +extern void put_unused_fd(unsigned int fd); + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3,7,0) +extern struct file* +sock_alloc_file(struct socket *sock, + int flags, const char *dname); + +extern int get_unused_fd_flags(unsigned flags); + +extern void fd_install(unsigned int fd, struct file *file); + +static int kni_sock_map_fd(struct socket *sock) +{ + struct file *file; + int fd = get_unused_fd_flags(0); + if (fd < 0) + return fd; + + file = sock_alloc_file(sock, 0, NULL); + if (IS_ERR(file)) { + put_unused_fd(fd); + return PTR_ERR(file); + } + fd_install(fd, file); + return fd; +} +#else +#define kni_sock_map_fd(s) sock_map_fd(s, 0) +#endif + +static struct proto kni_raw_proto = { + .name = "kni_vhost", + .owner = THIS_MODULE, + .obj_size = sizeof(struct kni_vhost_queue), +}; + +static inline int +kni_vhost_net_tx(struct kni_dev *kni, struct msghdr *m, + unsigned offset, unsigned len) +{ + struct rte_kni_mbuf *pkt_kva = NULL; + struct rte_kni_mbuf *pkt_va = NULL; + int ret; + + KNI_DBG_TX("tx offset=%d, len=%d, iovlen=%d\n", +#ifdef HAVE_IOV_ITER_MSGHDR + offset, len, (int)m->msg_iter.iov->iov_len); +#else + offset, len, (int)m->msg_iov->iov_len); +#endif + + /** + * Check if it has at least one free entry in tx_q and + * one entry in alloc_q. + */ + if (kni_fifo_free_count(kni->tx_q) == 0 || + kni_fifo_count(kni->alloc_q) == 0) { + /** + * If no free entry in tx_q or no entry in alloc_q, + * drops skb and goes out. + */ + goto drop; + } + + /* dequeue a mbuf from alloc_q */ + ret = kni_fifo_get(kni->alloc_q, (void **)&pkt_va, 1); + if (likely(ret == 1)) { + void *data_kva; + + pkt_kva = (void *)pkt_va - kni->mbuf_va + kni->mbuf_kva; + data_kva = pkt_kva->buf_addr + pkt_kva->data_off + - kni->mbuf_va + kni->mbuf_kva; + +#ifdef HAVE_IOV_ITER_MSGHDR + copy_from_iter(data_kva, len, &m->msg_iter); +#else + memcpy_fromiovecend(data_kva, m->msg_iov, offset, len); +#endif + + if (unlikely(len < ETH_ZLEN)) { + memset(data_kva + len, 0, ETH_ZLEN - len); + len = ETH_ZLEN; + } + pkt_kva->pkt_len = len; + pkt_kva->data_len = len; + + /* enqueue mbuf into tx_q */ + ret = kni_fifo_put(kni->tx_q, (void **)&pkt_va, 1); + if (unlikely(ret != 1)) { + /* Failing should not happen */ + KNI_ERR("Fail to enqueue mbuf into tx_q\n"); + goto drop; + } + } else { + /* Failing should not happen */ + KNI_ERR("Fail to dequeue mbuf from alloc_q\n"); + goto drop; + } + + /* update statistics */ + kni->stats.tx_bytes += len; + kni->stats.tx_packets++; + + return 0; + +drop: + /* update statistics */ + kni->stats.tx_dropped++; + + return 0; +} + +static inline int +kni_vhost_net_rx(struct kni_dev *kni, struct msghdr *m, + unsigned offset, unsigned len) +{ + uint32_t pkt_len; + struct rte_kni_mbuf *kva; + struct rte_kni_mbuf *va; + void * data_kva; + struct sk_buff *skb; + struct kni_vhost_queue *q = kni->vhost_queue; + + if (unlikely(q == NULL)) + return 0; + + /* ensure at least one entry in free_q */ + if (unlikely(kni_fifo_free_count(kni->free_q) == 0)) + return 0; + + skb = skb_dequeue(&q->sk.sk_receive_queue); + if (unlikely(skb == NULL)) + return 0; + + kva = (struct rte_kni_mbuf*)skb->data; + + /* free skb to cache */ + skb->data = NULL; + if (unlikely(1 != kni_fifo_put(q->fifo, (void **)&skb, 1))) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue entries into rx cache fifo\n"); + + pkt_len = kva->data_len; + if (unlikely(pkt_len > len)) + goto drop; + + KNI_DBG_RX("rx offset=%d, len=%d, pkt_len=%d, iovlen=%d\n", +#ifdef HAVE_IOV_ITER_MSGHDR + offset, len, pkt_len, (int)m->msg_iter.iov->iov_len); +#else + offset, len, pkt_len, (int)m->msg_iov->iov_len); +#endif + + data_kva = kva->buf_addr + kva->data_off - kni->mbuf_va + kni->mbuf_kva; +#ifdef HAVE_IOV_ITER_MSGHDR + if (unlikely(copy_to_iter(data_kva, pkt_len, &m->msg_iter))) +#else + if (unlikely(memcpy_toiovecend(m->msg_iov, data_kva, offset, pkt_len))) +#endif + goto drop; + + /* Update statistics */ + kni->stats.rx_bytes += pkt_len; + kni->stats.rx_packets++; + + /* enqueue mbufs into free_q */ + va = (void*)kva - kni->mbuf_kva + kni->mbuf_va; + if (unlikely(1 != kni_fifo_put(kni->free_q, (void **)&va, 1))) + /* Failing should not happen */ + KNI_ERR("Fail to enqueue entries into free_q\n"); + + KNI_DBG_RX("receive done %d\n", pkt_len); + + return pkt_len; + +drop: + /* Update drop statistics */ + kni->stats.rx_dropped++; + + return 0; +} + +static unsigned int +kni_sock_poll(struct file *file, struct socket *sock, poll_table * wait) +{ + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + struct kni_dev *kni; + unsigned int mask = 0; + + if (unlikely(q == NULL || q->kni == NULL)) + return POLLERR; + + kni = q->kni; +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) + KNI_DBG("start kni_poll on group %d, wq 0x%16llx\n", + kni->group_id, (uint64_t)sock->wq); +#else + KNI_DBG("start kni_poll on group %d, wait at 0x%16llx\n", + kni->group_id, (uint64_t)&sock->wait); +#endif + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) + poll_wait(file, &sock->wq->wait, wait); +#else + poll_wait(file, &sock->wait, wait); +#endif + + if (kni_fifo_count(kni->rx_q) > 0) + mask |= POLLIN | POLLRDNORM; + + if (sock_writeable(&q->sk) || +#ifdef SOCKWQ_ASYNC_NOSPACE + (!test_and_set_bit(SOCKWQ_ASYNC_NOSPACE, &q->sock->flags) && +#else + (!test_and_set_bit(SOCK_ASYNC_NOSPACE, &q->sock->flags) && +#endif + sock_writeable(&q->sk))) + mask |= POLLOUT | POLLWRNORM; + + return mask; +} + +static inline void +kni_vhost_enqueue(struct kni_dev *kni, struct kni_vhost_queue *q, + struct sk_buff *skb, struct rte_kni_mbuf *va) +{ + struct rte_kni_mbuf *kva; + + kva = (void *)(va) - kni->mbuf_va + kni->mbuf_kva; + (skb)->data = (unsigned char*)kva; + (skb)->len = kva->data_len; + skb_queue_tail(&q->sk.sk_receive_queue, skb); +} + +static inline void +kni_vhost_enqueue_burst(struct kni_dev *kni, struct kni_vhost_queue *q, + struct sk_buff **skb, struct rte_kni_mbuf **va) +{ + int i; + for (i = 0; i < RX_BURST_SZ; skb++, va++, i++) + kni_vhost_enqueue(kni, q, *skb, *va); +} + +int +kni_chk_vhost_rx(struct kni_dev *kni) +{ + struct kni_vhost_queue *q = kni->vhost_queue; + unsigned nb_in, nb_mbuf, nb_skb; + const unsigned BURST_MASK = RX_BURST_SZ - 1; + unsigned nb_burst, nb_backlog, i; + struct sk_buff *skb[RX_BURST_SZ]; + struct rte_kni_mbuf *va[RX_BURST_SZ]; + + if (unlikely(BE_STOP & kni->vq_status)) { + kni->vq_status |= BE_FINISH; + return 0; + } + + if (unlikely(q == NULL)) + return 0; + + nb_skb = kni_fifo_count(q->fifo); + nb_mbuf = kni_fifo_count(kni->rx_q); + + nb_in = min(nb_mbuf, nb_skb); + nb_in = min(nb_in, (unsigned)RX_BURST_SZ); + nb_burst = (nb_in & ~BURST_MASK); + nb_backlog = (nb_in & BURST_MASK); + + /* enqueue skb_queue per BURST_SIZE bulk */ + if (0 != nb_burst) { + if (unlikely(RX_BURST_SZ != kni_fifo_get( + kni->rx_q, (void **)&va, + RX_BURST_SZ))) + goto except; + + if (unlikely(RX_BURST_SZ != kni_fifo_get( + q->fifo, (void **)&skb, + RX_BURST_SZ))) + goto except; + + kni_vhost_enqueue_burst(kni, q, skb, va); + } + + /* all leftover, do one by one */ + for (i = 0; i < nb_backlog; ++i) { + if (unlikely(1 != kni_fifo_get( + kni->rx_q,(void **)&va, 1))) + goto except; + + if (unlikely(1 != kni_fifo_get( + q->fifo, (void **)&skb, 1))) + goto except; + + kni_vhost_enqueue(kni, q, *skb, *va); + } + + /* Ondemand wake up */ + if ((nb_in == RX_BURST_SZ) || (nb_skb == 0) || + ((nb_mbuf < RX_BURST_SZ) && (nb_mbuf != 0))) { + wake_up_interruptible_poll(sk_sleep(&q->sk), + POLLIN | POLLRDNORM | POLLRDBAND); + KNI_DBG_RX("RX CHK KICK nb_mbuf %d, nb_skb %d, nb_in %d\n", + nb_mbuf, nb_skb, nb_in); + } + + return 0; + +except: + /* Failing should not happen */ + KNI_ERR("Fail to enqueue fifo, it shouldn't happen \n"); + BUG_ON(1); + + return 0; +} + +static int +#ifdef HAVE_KIOCB_MSG_PARAM +kni_sock_sndmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t total_len) +#else +kni_sock_sndmsg(struct socket *sock, + struct msghdr *m, size_t total_len) +#endif /* HAVE_KIOCB_MSG_PARAM */ +{ + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + int vnet_hdr_len = 0; + unsigned long len = total_len; + + if (unlikely(q == NULL || q->kni == NULL)) + return 0; + + KNI_DBG_TX("kni_sndmsg len %ld, flags 0x%08x, nb_iov %d\n", +#ifdef HAVE_IOV_ITER_MSGHDR + len, q->flags, (int)m->msg_iter.iov->iov_len); +#else + len, q->flags, (int)m->msg_iovlen); +#endif + +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + if (likely(q->flags & IFF_VNET_HDR)) { + vnet_hdr_len = q->vnet_hdr_sz; + if (unlikely(len < vnet_hdr_len)) + return -EINVAL; + len -= vnet_hdr_len; + } +#endif + + if (unlikely(len < ETH_HLEN + q->vnet_hdr_sz)) + return -EINVAL; + + return kni_vhost_net_tx(q->kni, m, vnet_hdr_len, len); +} + +static int +#ifdef HAVE_KIOCB_MSG_PARAM +kni_sock_rcvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *m, size_t len, int flags) +#else +kni_sock_rcvmsg(struct socket *sock, + struct msghdr *m, size_t len, int flags) +#endif /* HAVE_KIOCB_MSG_PARAM */ +{ + int vnet_hdr_len = 0; + int pkt_len = 0; + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + static struct virtio_net_hdr + __attribute__ ((unused)) vnet_hdr = { + .flags = 0, + .gso_type = VIRTIO_NET_HDR_GSO_NONE + }; + + if (unlikely(q == NULL || q->kni == NULL)) + return 0; + +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + if (likely(q->flags & IFF_VNET_HDR)) { + vnet_hdr_len = q->vnet_hdr_sz; + if ((len -= vnet_hdr_len) < 0) + return -EINVAL; + } +#endif + + if (unlikely(0 == (pkt_len = kni_vhost_net_rx(q->kni, + m, vnet_hdr_len, len)))) + return 0; + +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + /* no need to copy hdr when no pkt received */ +#ifdef HAVE_IOV_ITER_MSGHDR + if (unlikely(copy_to_iter((void *)&vnet_hdr, vnet_hdr_len, + &m->msg_iter))) +#else + if (unlikely(memcpy_toiovecend(m->msg_iov, + (void *)&vnet_hdr, 0, vnet_hdr_len))) +#endif /* HAVE_IOV_ITER_MSGHDR */ + return -EFAULT; +#endif /* RTE_KNI_VHOST_VNET_HDR_EN */ + KNI_DBG_RX("kni_rcvmsg expect_len %ld, flags 0x%08x, pkt_len %d\n", + (unsigned long)len, q->flags, pkt_len); + + return pkt_len + vnet_hdr_len; +} + +/* dummy tap like ioctl */ +static int +kni_sock_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + void __user *argp = (void __user *)arg; + struct ifreq __user *ifr = argp; + unsigned int __user *up = argp; + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + struct kni_dev *kni; + unsigned int u; + int __user *sp = argp; + int s; + int ret; + + KNI_DBG("tap ioctl cmd 0x%08x\n", cmd); + + switch (cmd) { + case TUNSETIFF: + KNI_DBG("TUNSETIFF\n"); + /* ignore the name, just look at flags */ + if (get_user(u, &ifr->ifr_flags)) + return -EFAULT; + + ret = 0; + if ((u & ~IFF_VNET_HDR) != (IFF_NO_PI | IFF_TAP)) + ret = -EINVAL; + else + q->flags = u; + + return ret; + + case TUNGETIFF: + KNI_DBG("TUNGETIFF\n"); + rcu_read_lock_bh(); + kni = rcu_dereference_bh(q->kni); + if (kni) + dev_hold(kni->net_dev); + rcu_read_unlock_bh(); + + if (!kni) + return -ENOLINK; + + ret = 0; + if (copy_to_user(&ifr->ifr_name, kni->net_dev->name, IFNAMSIZ) || + put_user(q->flags, &ifr->ifr_flags)) + ret = -EFAULT; + dev_put(kni->net_dev); + return ret; + + case TUNGETFEATURES: + KNI_DBG("TUNGETFEATURES\n"); + u = IFF_TAP | IFF_NO_PI; +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + u |= IFF_VNET_HDR; +#endif + if (put_user(u, up)) + return -EFAULT; + return 0; + + case TUNSETSNDBUF: + KNI_DBG("TUNSETSNDBUF\n"); + if (get_user(u, up)) + return -EFAULT; + + q->sk.sk_sndbuf = u; + return 0; + + case TUNGETVNETHDRSZ: + s = q->vnet_hdr_sz; + if (put_user(s, sp)) + return -EFAULT; + KNI_DBG("TUNGETVNETHDRSZ %d\n", s); + return 0; + + case TUNSETVNETHDRSZ: + if (get_user(s, sp)) + return -EFAULT; + if (s < (int)sizeof(struct virtio_net_hdr)) + return -EINVAL; + + KNI_DBG("TUNSETVNETHDRSZ %d\n", s); + q->vnet_hdr_sz = s; + return 0; + + case TUNSETOFFLOAD: + KNI_DBG("TUNSETOFFLOAD %lx\n", arg); +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + /* not support any offload yet */ + if (!(q->flags & IFF_VNET_HDR)) + return -EINVAL; + + return 0; +#else + return -EINVAL; +#endif + + default: + KNI_DBG("NOT SUPPORT\n"); + return -EINVAL; + } +} + +static int +kni_sock_compat_ioctl(struct socket *sock, unsigned int cmd, + unsigned long arg) +{ + /* 32 bits app on 64 bits OS to be supported later */ + KNI_PRINT("Not implemented.\n"); + + return -EINVAL; +} + +#define KNI_VHOST_WAIT_WQ_SAFE() \ +do { \ + while ((BE_FINISH | BE_STOP) == kni->vq_status) \ + msleep(1); \ +}while(0) \ + + +static int +kni_sock_release(struct socket *sock) +{ + struct kni_vhost_queue *q = + container_of(sock->sk, struct kni_vhost_queue, sk); + struct kni_dev *kni; + + if (q == NULL) + return 0; + + if (NULL != (kni = q->kni)) { + kni->vq_status = BE_STOP; + KNI_VHOST_WAIT_WQ_SAFE(); + kni->vhost_queue = NULL; + q->kni = NULL; + } + + if (q->sockfd != -1) + q->sockfd = -1; + + sk_set_socket(&q->sk, NULL); + sock->sk = NULL; + + sock_put(&q->sk); + + KNI_DBG("dummy sock release done\n"); + + return 0; +} + +int +kni_sock_getname (struct socket *sock, + struct sockaddr *addr, + int *sockaddr_len, int peer) +{ + KNI_DBG("dummy sock getname\n"); + ((struct sockaddr_ll*)addr)->sll_family = AF_PACKET; + return 0; +} + +static const struct proto_ops kni_socket_ops = { + .getname = kni_sock_getname, + .sendmsg = kni_sock_sndmsg, + .recvmsg = kni_sock_rcvmsg, + .release = kni_sock_release, + .poll = kni_sock_poll, + .ioctl = kni_sock_ioctl, + .compat_ioctl = kni_sock_compat_ioctl, +}; + +static void +kni_sk_write_space(struct sock *sk) +{ + wait_queue_head_t *wqueue; + + if (!sock_writeable(sk) || +#ifdef SOCKWQ_ASYNC_NOSPACE + !test_and_clear_bit(SOCKWQ_ASYNC_NOSPACE, &sk->sk_socket->flags)) +#else + !test_and_clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags)) +#endif + return; + wqueue = sk_sleep(sk); + if (wqueue && waitqueue_active(wqueue)) + wake_up_interruptible_poll( + wqueue, POLLOUT | POLLWRNORM | POLLWRBAND); +} + +static void +kni_sk_destruct(struct sock *sk) +{ + struct kni_vhost_queue *q = + container_of(sk, struct kni_vhost_queue, sk); + + if (!q) + return; + + /* make sure there's no packet in buffer */ + while (skb_dequeue(&sk->sk_receive_queue) != NULL) + ; + + mb(); + + if (q->fifo != NULL) { + kfree(q->fifo); + q->fifo = NULL; + } + + if (q->cache != NULL) { + kfree(q->cache); + q->cache = NULL; + } +} + +static int +kni_vhost_backend_init(struct kni_dev *kni) +{ + struct kni_vhost_queue *q; + struct net *net = current->nsproxy->net_ns; + int err, i, sockfd; + struct rte_kni_fifo *fifo; + struct sk_buff *elem; + + if (kni->vhost_queue != NULL) + return -1; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 2, 0) + q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &kni_raw_proto, 0); +#else + q = (struct kni_vhost_queue *)sk_alloc(net, AF_UNSPEC, GFP_KERNEL, + &kni_raw_proto); +#endif + if (!q) + return -ENOMEM; + + err = sock_create_lite(AF_UNSPEC, SOCK_RAW, IPPROTO_RAW, &q->sock); + if (err) + goto free_sk; + + sockfd = kni_sock_map_fd(q->sock); + if (sockfd < 0) { + err = sockfd; + goto free_sock; + } + + /* cache init */ + q->cache = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(struct sk_buff), + GFP_KERNEL); + if (!q->cache) + goto free_fd; + + fifo = kzalloc(RTE_KNI_VHOST_MAX_CACHE_SIZE * sizeof(void *) + + sizeof(struct rte_kni_fifo), GFP_KERNEL); + if (!fifo) + goto free_cache; + + kni_fifo_init(fifo, RTE_KNI_VHOST_MAX_CACHE_SIZE); + + for (i = 0; i < RTE_KNI_VHOST_MAX_CACHE_SIZE; i++) { + elem = &q->cache[i]; + kni_fifo_put(fifo, (void**)&elem, 1); + } + q->fifo = fifo; + + /* store sockfd in vhost_queue */ + q->sockfd = sockfd; + + /* init socket */ + q->sock->type = SOCK_RAW; + q->sock->state = SS_CONNECTED; + q->sock->ops = &kni_socket_ops; + sock_init_data(q->sock, &q->sk); + + /* init sock data */ + q->sk.sk_write_space = kni_sk_write_space; + q->sk.sk_destruct = kni_sk_destruct; + q->flags = IFF_NO_PI | IFF_TAP; + q->vnet_hdr_sz = sizeof(struct virtio_net_hdr); +#ifdef RTE_KNI_VHOST_VNET_HDR_EN + q->flags |= IFF_VNET_HDR; +#endif + + /* bind kni_dev with vhost_queue */ + q->kni = kni; + kni->vhost_queue = q; + + wmb(); + + kni->vq_status = BE_START; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,35) + KNI_DBG("backend init sockfd=%d, sock->wq=0x%16llx," + "sk->sk_wq=0x%16llx", + q->sockfd, (uint64_t)q->sock->wq, + (uint64_t)q->sk.sk_wq); +#else + KNI_DBG("backend init sockfd=%d, sock->wait at 0x%16llx," + "sk->sk_sleep=0x%16llx", + q->sockfd, (uint64_t)&q->sock->wait, + (uint64_t)q->sk.sk_sleep); +#endif + + return 0; + +free_cache: + kfree(q->cache); + q->cache = NULL; + +free_fd: + put_unused_fd(sockfd); + +free_sock: + q->kni = NULL; + kni->vhost_queue = NULL; + kni->vq_status |= BE_FINISH; + sock_release(q->sock); + q->sock->ops = NULL; + q->sock = NULL; + +free_sk: + sk_free((struct sock*)q); + + return err; +} + +/* kni vhost sock sysfs */ +static ssize_t +show_sock_fd(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *net_dev = container_of(dev, struct net_device, dev); + struct kni_dev *kni = netdev_priv(net_dev); + int sockfd = -1; + if (kni->vhost_queue != NULL) + sockfd = kni->vhost_queue->sockfd; + return snprintf(buf, 10, "%d\n", sockfd); +} + +static ssize_t +show_sock_en(struct device *dev, struct device_attribute *attr, + char *buf) +{ + struct net_device *net_dev = container_of(dev, struct net_device, dev); + struct kni_dev *kni = netdev_priv(net_dev); + return snprintf(buf, 10, "%u\n", (kni->vhost_queue == NULL ? 0 : 1)); +} + +static ssize_t +set_sock_en(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct net_device *net_dev = container_of(dev, struct net_device, dev); + struct kni_dev *kni = netdev_priv(net_dev); + unsigned long en; + int err = 0; + + if (0 != kstrtoul(buf, 0, &en)) + return -EINVAL; + + if (en) + err = kni_vhost_backend_init(kni); + + return err ? err : count; +} + +static DEVICE_ATTR(sock_fd, S_IRUGO | S_IRUSR, show_sock_fd, NULL); +static DEVICE_ATTR(sock_en, S_IRUGO | S_IWUSR, show_sock_en, set_sock_en); +static struct attribute *dev_attrs[] = { + &dev_attr_sock_fd.attr, + &dev_attr_sock_en.attr, + NULL, +}; + +static const struct attribute_group dev_attr_grp = { + .attrs = dev_attrs, +}; + +int +kni_vhost_backend_release(struct kni_dev *kni) +{ + struct kni_vhost_queue *q = kni->vhost_queue; + + if (q == NULL) + return 0; + + /* dettach from kni */ + q->kni = NULL; + + KNI_DBG("release backend done\n"); + + return 0; +} + +int +kni_vhost_init(struct kni_dev *kni) +{ + struct net_device *dev = kni->net_dev; + + if (sysfs_create_group(&dev->dev.kobj, &dev_attr_grp)) + sysfs_remove_group(&dev->dev.kobj, &dev_attr_grp); + + kni->vq_status = BE_STOP; + + KNI_DBG("kni_vhost_init done\n"); + + return 0; +} diff --git a/lib/librte_eal/linuxapp/xen_dom0/Makefile b/lib/librte_eal/linuxapp/xen_dom0/Makefile new file mode 100644 index 00000000..9d22fb97 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/Makefile @@ -0,0 +1,56 @@ +# BSD LICENSE +# +# Copyright(c) 2010-2014 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include $(RTE_SDK)/mk/rte.vars.mk + +# +# module name and path +# +MODULE = rte_dom0_mm + +# +# CFLAGS +# +MODULE_CFLAGS += -I$(SRCDIR) --param max-inline-insns-single=50 +MODULE_CFLAGS += -I$(RTE_OUTPUT)/include +MODULE_CFLAGS += -include $(RTE_OUTPUT)/include/rte_config.h +MODULE_CFLAGS += -Wall -Werror + +# this lib needs main eal +DEPDIRS-y += lib/librte_eal/linuxapp/eal + +# +# all source are stored in SRCS-y +# + +SRCS-y += dom0_mm_misc.c + +include $(RTE_SDK)/mk/rte.module.mk diff --git a/lib/librte_eal/linuxapp/xen_dom0/compat.h b/lib/librte_eal/linuxapp/xen_dom0/compat.h new file mode 100644 index 00000000..e6eb97f2 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/compat.h @@ -0,0 +1,15 @@ +/* + * Minimal wrappers to allow compiling xen_dom0 on older kernels. + */ + +#ifndef RHEL_RELEASE_VERSION +#define RHEL_RELEASE_VERSION(a, b) (((a) << 8) + (b)) +#endif + +#if LINUX_VERSION_CODE < KERNEL_VERSION(2, 6, 39) && \ + (!(defined(RHEL_RELEASE_CODE) && \ + RHEL_RELEASE_CODE >= RHEL_RELEASE_VERSION(6, 4))) + +#define kstrtoul strict_strtoul + +#endif /* < 2.6.39 */ diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h new file mode 100644 index 00000000..9d5ffb22 --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_dev.h @@ -0,0 +1,107 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ +#ifndef _DOM0_MM_DEV_H_ +#define _DOM0_MM_DEV_H_ + +#include +#include +#include +#include +#include + +#define NUM_MEM_CTX 256 /**< Maximum number of memory context*/ +#define MAX_EXCHANGE_FAIL_TIME 5 /**< Maximum times of allowing exchange fail .*/ +#define MAX_MEMBLOCK_SIZE (2 * DOM0_MEMBLOCK_SIZE) +#define MAX_NUM_ORDER (DOM0_CONTIG_NUM_ORDER + 1) +#define SIZE_PER_BLOCK 2 /**< Size of memory block (2MB).*/ + +/** + * A structure describing the private information for a dom0 device. + */ +struct dom0_mm_dev { + struct miscdevice miscdev; + uint8_t fail_times; + uint32_t used_memsize; + uint32_t num_mem_ctx; + uint32_t config_memsize; + uint32_t num_bigblock; + struct dom0_mm_data *mm_data[NUM_MEM_CTX]; + struct mutex data_lock; +}; + +struct dom0_mm_data{ + uint32_t refcnt; + uint32_t num_memseg; /**< Number of memory segment. */ + uint32_t mem_size; /**< Size of requesting memory. */ + + char name[DOM0_NAME_MAX]; + + /** Store global memory block IDs used by an instance */ + uint32_t block_num[DOM0_NUM_MEMBLOCK]; + + /** Store memory block information.*/ + struct memblock_info block_info[DOM0_NUM_MEMBLOCK]; + + /** Store memory segment information.*/ + struct memseg_info seg_info[DOM0_NUM_MEMSEG]; +}; + +#define XEN_ERR(args...) printk(KERN_DEBUG "XEN_DOM0: Error: " args) +#define XEN_PRINT(args...) printk(KERN_DEBUG "XEN_DOM0: " args) +#endif diff --git a/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c new file mode 100644 index 00000000..79630bad --- /dev/null +++ b/lib/librte_eal/linuxapp/xen_dom0/dom0_mm_misc.c @@ -0,0 +1,780 @@ +/*- + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA. + * The full GNU General Public License is included in this distribution + * in the file called LICENSE.GPL. + * + * Contact Information: + * Intel Corporation + * + * BSD LICENSE + * + * Copyright(c) 2010-2014 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include "compat.h" +#include "dom0_mm_dev.h" + +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_AUTHOR("Intel Corporation"); +MODULE_DESCRIPTION("Kernel Module for supporting DPDK running on Xen Dom0"); + +static struct dom0_mm_dev dom0_dev; +static struct kobject *dom0_kobj = NULL; + +static struct memblock_info *rsv_mm_info; + +/* Default configuration for reserved memory size(2048 MB). */ +static uint32_t rsv_memsize = 2048; + +static int dom0_open(struct inode *inode, struct file *file); +static int dom0_release(struct inode *inode, struct file *file); +static int dom0_ioctl(struct file *file, unsigned int ioctl_num, + unsigned long ioctl_param); +static int dom0_mmap(struct file *file, struct vm_area_struct *vma); +static int dom0_memory_free(uint32_t size); +static int dom0_memory_release(struct dom0_mm_data *mm_data); + +static const struct file_operations data_fops = { + .owner = THIS_MODULE, + .open = dom0_open, + .release = dom0_release, + .mmap = dom0_mmap, + .unlocked_ioctl = (void *)dom0_ioctl, +}; + +static ssize_t +show_memsize_rsvd(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, 10, "%u\n", dom0_dev.used_memsize); +} + +static ssize_t +show_memsize(struct device *dev, struct device_attribute *attr, char *buf) +{ + return snprintf(buf, 10, "%u\n", dom0_dev.config_memsize); +} + +static ssize_t +store_memsize(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + int err = 0; + unsigned long mem_size; + + if (0 != kstrtoul(buf, 0, &mem_size)) + return -EINVAL; + + mutex_lock(&dom0_dev.data_lock); + if (0 == mem_size) { + err = -EINVAL; + goto fail; + } else if (mem_size > (rsv_memsize - dom0_dev.used_memsize)) { + XEN_ERR("configure memory size fail\n"); + err = -EINVAL; + goto fail; + } else + dom0_dev.config_memsize = mem_size; + +fail: + mutex_unlock(&dom0_dev.data_lock); + return err ? err : count; +} + +static DEVICE_ATTR(memsize, S_IRUGO | S_IWUSR, show_memsize, store_memsize); +static DEVICE_ATTR(memsize_rsvd, S_IRUGO, show_memsize_rsvd, NULL); + +static struct attribute *dev_attrs[] = { + &dev_attr_memsize.attr, + &dev_attr_memsize_rsvd.attr, + NULL, +}; + +/* the memory size unit is MB */ +static const struct attribute_group dev_attr_grp = { + .name = "memsize-mB", + .attrs = dev_attrs, +}; + + +static void +sort_viraddr(struct memblock_info *mb, int cnt) +{ + int i,j; + uint64_t tmp_pfn; + uint64_t tmp_viraddr; + + /*sort virtual address and pfn */ + for(i = 0; i < cnt; i ++) { + for(j = cnt - 1; j > i; j--) { + if(mb[j].pfn < mb[j - 1].pfn) { + tmp_pfn = mb[j - 1].pfn; + mb[j - 1].pfn = mb[j].pfn; + mb[j].pfn = tmp_pfn; + + tmp_viraddr = mb[j - 1].vir_addr; + mb[j - 1].vir_addr = mb[j].vir_addr; + mb[j].vir_addr = tmp_viraddr; + } + } + } +} + +static int +dom0_find_memdata(const char * mem_name) +{ + unsigned i; + int idx = -1; + for(i = 0; i< NUM_MEM_CTX; i++) { + if(dom0_dev.mm_data[i] == NULL) + continue; + if (!strncmp(dom0_dev.mm_data[i]->name, mem_name, + sizeof(char) * DOM0_NAME_MAX)) { + idx = i; + break; + } + } + + return idx; +} + +static int +dom0_find_mempos(void) +{ + unsigned i; + int idx = -1; + + for(i = 0; i< NUM_MEM_CTX; i++) { + if(dom0_dev.mm_data[i] == NULL){ + idx = i; + break; + } + } + + return idx; +} + +static int +dom0_memory_release(struct dom0_mm_data *mm_data) +{ + int idx; + uint32_t num_block, block_id; + + /* each memory block is 2M */ + num_block = mm_data->mem_size / SIZE_PER_BLOCK; + if (num_block == 0) + return -EINVAL; + + /* reset global memory data */ + idx = dom0_find_memdata(mm_data->name); + if (idx >= 0) { + dom0_dev.used_memsize -= mm_data->mem_size; + dom0_dev.mm_data[idx] = NULL; + dom0_dev.num_mem_ctx--; + } + + /* reset these memory blocks status as free */ + for (idx = 0; idx < num_block; idx++) { + block_id = mm_data->block_num[idx]; + rsv_mm_info[block_id].used = 0; + } + + memset(mm_data, 0, sizeof(struct dom0_mm_data)); + vfree(mm_data); + return 0; +} + +static int +dom0_memory_free(uint32_t rsv_size) +{ + uint64_t vstart, vaddr; + uint32_t i, num_block, size; + + if (!xen_pv_domain()) + return -1; + + /* each memory block is 2M */ + num_block = rsv_size / SIZE_PER_BLOCK; + if (num_block == 0) + return -EINVAL; + + /* free all memory blocks of size of 4M and destroy contiguous region */ + for (i = 0; i < dom0_dev.num_bigblock * 2; i += 2) { + vstart = rsv_mm_info[i].vir_addr; + if (vstart) { + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) + if (rsv_mm_info[i].exchange_flag) + xen_destroy_contiguous_region(vstart, + DOM0_CONTIG_NUM_ORDER); + if (rsv_mm_info[i + 1].exchange_flag) + xen_destroy_contiguous_region(vstart + + DOM0_MEMBLOCK_SIZE, + DOM0_CONTIG_NUM_ORDER); + #else + if (rsv_mm_info[i].exchange_flag) + xen_destroy_contiguous_region(rsv_mm_info[i].pfn + * PAGE_SIZE, + DOM0_CONTIG_NUM_ORDER); + if (rsv_mm_info[i + 1].exchange_flag) + xen_destroy_contiguous_region(rsv_mm_info[i].pfn + * PAGE_SIZE + DOM0_MEMBLOCK_SIZE, + DOM0_CONTIG_NUM_ORDER); + #endif + + size = DOM0_MEMBLOCK_SIZE * 2; + vaddr = vstart; + while (size > 0) { + ClearPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + free_pages(vstart, MAX_NUM_ORDER); + } + } + + /* free all memory blocks size of 2M and destroy contiguous region */ + for (; i < num_block; i++) { + vstart = rsv_mm_info[i].vir_addr; + if (vstart) { + if (rsv_mm_info[i].exchange_flag) + xen_destroy_contiguous_region(vstart, + DOM0_CONTIG_NUM_ORDER); + + size = DOM0_MEMBLOCK_SIZE; + vaddr = vstart; + while (size > 0) { + ClearPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + free_pages(vstart, DOM0_CONTIG_NUM_ORDER); + } + } + + memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); + vfree(rsv_mm_info); + rsv_mm_info = NULL; + + return 0; +} + +static void +find_free_memory(uint32_t count, struct dom0_mm_data *mm_data) +{ + uint32_t i = 0; + uint32_t j = 0; + + while ((i < count) && (j < rsv_memsize / SIZE_PER_BLOCK)) { + if (rsv_mm_info[j].used == 0) { + mm_data->block_info[i].pfn = rsv_mm_info[j].pfn; + mm_data->block_info[i].vir_addr = + rsv_mm_info[j].vir_addr; + mm_data->block_info[i].mfn = rsv_mm_info[j].mfn; + mm_data->block_info[i].exchange_flag = + rsv_mm_info[j].exchange_flag; + mm_data->block_num[i] = j; + rsv_mm_info[j].used = 1; + i++; + } + j++; + } +} + +/** + * Find all memory segments in which physical addresses are contiguous. + */ +static void +find_memseg(int count, struct dom0_mm_data * mm_data) +{ + int i = 0; + int j, k, idx = 0; + uint64_t zone_len, pfn, num_block; + + while(i < count) { + if (mm_data->block_info[i].exchange_flag == 0) { + i++; + continue; + } + k = 0; + pfn = mm_data->block_info[i].pfn; + mm_data->seg_info[idx].pfn = pfn; + mm_data->seg_info[idx].mfn[k] = mm_data->block_info[i].mfn; + + for (j = i + 1; j < count; j++) { + + /* ignore exchange fail memory block */ + if (mm_data->block_info[j].exchange_flag == 0) + break; + + if (mm_data->block_info[j].pfn != + (mm_data->block_info[j - 1].pfn + + DOM0_MEMBLOCK_SIZE / PAGE_SIZE)) + break; + ++k; + mm_data->seg_info[idx].mfn[k] = mm_data->block_info[j].mfn; + } + + num_block = j - i; + zone_len = num_block * DOM0_MEMBLOCK_SIZE; + mm_data->seg_info[idx].size = zone_len; + + XEN_PRINT("memseg id=%d, size=0x%llx\n", idx, zone_len); + i = i+ num_block; + idx++; + if (idx == DOM0_NUM_MEMSEG) + break; + } + mm_data->num_memseg = idx; +} + +static int +dom0_memory_reserve(uint32_t rsv_size) +{ + uint64_t pfn, vstart, vaddr; + uint32_t i, num_block, size, allocated_size = 0; + +#if LINUX_VERSION_CODE >= KERNEL_VERSION(3, 13, 0) + dma_addr_t dma_handle; +#endif + + /* 2M as memory block */ + num_block = rsv_size / SIZE_PER_BLOCK; + + rsv_mm_info = vmalloc(sizeof(struct memblock_info) * num_block); + if (!rsv_mm_info) { + XEN_ERR("Unable to allocate device memory information\n"); + return -ENOMEM; + } + memset(rsv_mm_info, 0, sizeof(struct memblock_info) * num_block); + + /* try alloc size of 4M once */ + for (i = 0; i < num_block; i += 2) { + vstart = (unsigned long) + __get_free_pages(GFP_ATOMIC, MAX_NUM_ORDER); + if (vstart == 0) + break; + + dom0_dev.num_bigblock = i / 2 + 1; + allocated_size = SIZE_PER_BLOCK * (i + 2); + + /* size of 4M */ + size = DOM0_MEMBLOCK_SIZE * 2; + + vaddr = vstart; + while (size > 0) { + SetPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + + pfn = virt_to_pfn(vstart); + rsv_mm_info[i].pfn = pfn; + rsv_mm_info[i].vir_addr = vstart; + rsv_mm_info[i + 1].pfn = + pfn + DOM0_MEMBLOCK_SIZE / PAGE_SIZE; + rsv_mm_info[i + 1].vir_addr = + vstart + DOM0_MEMBLOCK_SIZE; + } + + /*if it failed to alloc 4M, and continue to alloc 2M once */ + for (; i < num_block; i++) { + vstart = (unsigned long) + __get_free_pages(GFP_ATOMIC, DOM0_CONTIG_NUM_ORDER); + if (vstart == 0) { + XEN_ERR("allocate memory fail.\n"); + dom0_memory_free(allocated_size); + return -ENOMEM; + } + + allocated_size += SIZE_PER_BLOCK; + + size = DOM0_MEMBLOCK_SIZE; + vaddr = vstart; + while (size > 0) { + SetPageReserved(virt_to_page(vaddr)); + vaddr += PAGE_SIZE; + size -= PAGE_SIZE; + } + pfn = virt_to_pfn(vstart); + rsv_mm_info[i].pfn = pfn; + rsv_mm_info[i].vir_addr = vstart; + } + + sort_viraddr(rsv_mm_info, num_block); + + for (i = 0; i< num_block; i++) { + + /* + * This API is used to exchage MFN for getting a block of + * contiguous physical addresses, its maximum size is 2M. + */ + #if LINUX_VERSION_CODE < KERNEL_VERSION(3, 13, 0) + if (xen_create_contiguous_region(rsv_mm_info[i].vir_addr, + DOM0_CONTIG_NUM_ORDER, 0) == 0) { + #else + if (xen_create_contiguous_region(rsv_mm_info[i].pfn * PAGE_SIZE, + DOM0_CONTIG_NUM_ORDER, 0, &dma_handle) == 0) { + #endif + rsv_mm_info[i].exchange_flag = 1; + rsv_mm_info[i].mfn = + pfn_to_mfn(rsv_mm_info[i].pfn); + rsv_mm_info[i].used = 0; + } else { + XEN_ERR("exchange memeory fail\n"); + rsv_mm_info[i].exchange_flag = 0; + dom0_dev.fail_times++; + if (dom0_dev.fail_times > MAX_EXCHANGE_FAIL_TIME) { + dom0_memory_free(rsv_size); + return -EFAULT; + } + } + } + + return 0; +} + +static int +dom0_prepare_memsegs(struct memory_info *meminfo, struct dom0_mm_data *mm_data) +{ + uint32_t num_block; + int idx; + + /* check if there is a free name buffer */ + memcpy(mm_data->name, meminfo->name, DOM0_NAME_MAX); + mm_data->name[DOM0_NAME_MAX - 1] = '\0'; + idx = dom0_find_mempos(); + if (idx < 0) + return -1; + + num_block = meminfo->size / SIZE_PER_BLOCK; + /* find free memory and new memory segments*/ + find_free_memory(num_block, mm_data); + find_memseg(num_block, mm_data); + + /* update private memory data */ + mm_data->refcnt++; + mm_data->mem_size = meminfo->size; + + /* update global memory data */ + dom0_dev.mm_data[idx] = mm_data; + dom0_dev.num_mem_ctx++; + dom0_dev.used_memsize += mm_data->mem_size; + + return 0; +} + +static int +dom0_check_memory (struct memory_info *meminfo) +{ + int idx; + uint64_t mem_size; + + /* round memory size to the next even number. */ + if (meminfo->size % 2) + ++meminfo->size; + + mem_size = meminfo->size; + if (dom0_dev.num_mem_ctx > NUM_MEM_CTX) { + XEN_ERR("Memory data space is full in Dom0 driver\n"); + return -1; + } + idx = dom0_find_memdata(meminfo->name); + if (idx >= 0) { + XEN_ERR("Memory data name %s has already exsited in Dom0 driver.\n", + meminfo->name); + return -1; + } + if ((dom0_dev.used_memsize + mem_size) > rsv_memsize) { + XEN_ERR("Total size can't be larger than reserved size.\n"); + return -1; + } + + return 0; +} + +static int __init +dom0_init(void) +{ + if (!xen_domain()) + return -ENODEV; + + if (rsv_memsize > DOM0_CONFIG_MEMSIZE) { + XEN_ERR("The reserved memory size cannot be greater than %d\n", + DOM0_CONFIG_MEMSIZE); + return -EINVAL; + } + + /* Setup the misc device */ + dom0_dev.miscdev.minor = MISC_DYNAMIC_MINOR; + dom0_dev.miscdev.name = "dom0_mm"; + dom0_dev.miscdev.fops = &data_fops; + + /* register misc char device */ + if (misc_register(&dom0_dev.miscdev) != 0) { + XEN_ERR("Misc device registration failed\n"); + return -EPERM; + } + + mutex_init(&dom0_dev.data_lock); + dom0_kobj = kobject_create_and_add("dom0-mm", mm_kobj); + + if (!dom0_kobj) { + XEN_ERR("dom0-mm object creation failed\n"); + misc_deregister(&dom0_dev.miscdev); + return -ENOMEM; + } + + if (sysfs_create_group(dom0_kobj, &dev_attr_grp)) { + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + return -EPERM; + } + + if (dom0_memory_reserve(rsv_memsize) < 0) { + sysfs_remove_group(dom0_kobj, &dev_attr_grp); + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + return -ENOMEM; + } + + XEN_PRINT("####### DPDK Xen Dom0 module loaded #######\n"); + + return 0; +} + +static void __exit +dom0_exit(void) +{ + if (rsv_mm_info != NULL) + dom0_memory_free(rsv_memsize); + + sysfs_remove_group(dom0_kobj, &dev_attr_grp); + kobject_put(dom0_kobj); + misc_deregister(&dom0_dev.miscdev); + + XEN_PRINT("####### DPDK Xen Dom0 module unloaded #######\n"); +} + +static int +dom0_open(struct inode *inode, struct file *file) +{ + file->private_data = NULL; + + XEN_PRINT(KERN_INFO "/dev/dom0_mm opened\n"); + return 0; +} + +static int +dom0_release(struct inode *inode, struct file *file) +{ + int ret = 0; + struct dom0_mm_data *mm_data = file->private_data; + + if (mm_data == NULL) + return ret; + + mutex_lock(&dom0_dev.data_lock); + if (--mm_data->refcnt == 0) + ret = dom0_memory_release(mm_data); + mutex_unlock(&dom0_dev.data_lock); + + file->private_data = NULL; + XEN_PRINT(KERN_INFO "/dev/dom0_mm closed\n"); + return ret; +} + +static int +dom0_mmap(struct file *file, struct vm_area_struct *vm) +{ + int status = 0; + uint32_t idx = vm->vm_pgoff; + uint64_t pfn, size = vm->vm_end - vm->vm_start; + struct dom0_mm_data *mm_data = file->private_data; + + if(mm_data == NULL) + return -EINVAL; + + mutex_lock(&dom0_dev.data_lock); + if (idx >= mm_data->num_memseg) { + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + if (size > mm_data->seg_info[idx].size){ + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + XEN_PRINT("mmap memseg idx =%d,size = 0x%llx\n", idx, size); + + pfn = mm_data->seg_info[idx].pfn; + mutex_unlock(&dom0_dev.data_lock); + + status = remap_pfn_range(vm, vm->vm_start, pfn, size, PAGE_SHARED); + + return status; +} +static int +dom0_ioctl(struct file *file, + unsigned int ioctl_num, + unsigned long ioctl_param) +{ + int idx, ret; + char name[DOM0_NAME_MAX] = {0}; + struct memory_info meminfo; + struct dom0_mm_data *mm_data = file->private_data; + + XEN_PRINT("IOCTL num=0x%0x param=0x%0lx \n", ioctl_num, ioctl_param); + + /** + * Switch according to the ioctl called + */ + switch _IOC_NR(ioctl_num) { + case _IOC_NR(RTE_DOM0_IOCTL_PREPARE_MEMSEG): + ret = copy_from_user(&meminfo, (void *)ioctl_param, + sizeof(struct memory_info)); + if (ret) + return -EFAULT; + + if (mm_data != NULL) { + XEN_ERR("Cannot create memory segment for the same" + " file descriptor\n"); + return -EINVAL; + } + + /* Allocate private data */ + mm_data = vmalloc(sizeof(struct dom0_mm_data)); + if (!mm_data) { + XEN_ERR("Unable to allocate device private data\n"); + return -ENOMEM; + } + memset(mm_data, 0, sizeof(struct dom0_mm_data)); + + mutex_lock(&dom0_dev.data_lock); + /* check if we can allocate memory*/ + if (dom0_check_memory(&meminfo) < 0) { + mutex_unlock(&dom0_dev.data_lock); + vfree(mm_data); + return -EINVAL; + } + + /* allocate memory and created memory segments*/ + if (dom0_prepare_memsegs(&meminfo, mm_data) < 0) { + XEN_ERR("create memory segment fail.\n"); + mutex_unlock(&dom0_dev.data_lock); + return -EIO; + } + + file->private_data = mm_data; + mutex_unlock(&dom0_dev.data_lock); + break; + + /* support multiple process in term of memory mapping*/ + case _IOC_NR(RTE_DOM0_IOCTL_ATTACH_TO_MEMSEG): + ret = copy_from_user(name, (void *)ioctl_param, + sizeof(char) * DOM0_NAME_MAX); + if (ret) + return -EFAULT; + + mutex_lock(&dom0_dev.data_lock); + idx = dom0_find_memdata(name); + if (idx < 0) { + mutex_unlock(&dom0_dev.data_lock); + return -EINVAL; + } + + mm_data = dom0_dev.mm_data[idx]; + mm_data->refcnt++; + file->private_data = mm_data; + mutex_unlock(&dom0_dev.data_lock); + break; + + case _IOC_NR(RTE_DOM0_IOCTL_GET_NUM_MEMSEG): + ret = copy_to_user((void *)ioctl_param, &mm_data->num_memseg, + sizeof(int)); + if (ret) + return -EFAULT; + break; + + case _IOC_NR(RTE_DOM0_IOCTL_GET_MEMSEG_INFO): + ret = copy_to_user((void *)ioctl_param, + &mm_data->seg_info[0], + sizeof(struct memseg_info) * + mm_data->num_memseg); + if (ret) + return -EFAULT; + break; + default: + XEN_PRINT("IOCTL default \n"); + break; + } + + return 0; +} + +module_init(dom0_init); +module_exit(dom0_exit); + +module_param(rsv_memsize, uint, S_IRUGO | S_IWUSR); +MODULE_PARM_DESC(rsv_memsize, "Xen-dom0 reserved memory size(MB).\n"); -- cgit 1.2.3-korg